libvpx: Necessary modifications

- convert ARM assembly (NEON) files,
- add rtcd for run-time CPU features detection,
- modify "system_state.h",
- "arm_cpudetect.c" fixes.
This commit is contained in:
Błażej Szczygieł 2016-09-14 22:10:55 +02:00
parent 5268443fdf
commit 1556d0d377
26 changed files with 5429 additions and 47 deletions

240
thirdparty/libvpx/rtcd/vp8_rtcd_arm.h vendored Normal file
View file

@ -0,0 +1,240 @@
#ifndef VP8_RTCD_H_
#define VP8_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* VP8
*/
struct blockd;
struct loop_filter_info;
#ifdef __cplusplus
extern "C" {
#endif
void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride);
RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
void vp8_dequantize_b_c(struct blockd*, short *dqc);
void vp8_dequantize_b_neon(struct blockd*, short *dqc);
RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc);
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bvs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_mbvs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
void vp8_short_inv_walsh4x4_c(short *input, short *output);
void vp8_short_inv_walsh4x4_neon(short *input, short *output);
RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_rtcd(void);
#ifdef RTCD_C
#include "vpx_ports/arm.h"
static void setup_rtcd_internal(void)
{
int flags = arm_cpu_caps();
vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon;
#endif
vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon;
#endif
vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon;
#endif
vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon;
#endif
vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon;
#endif
vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon;
#endif
vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon;
#endif
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon;
#endif
vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
#endif
vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
#endif
vp8_dequantize_b = vp8_dequantize_b_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon;
#endif
vp8_loop_filter_bh = vp8_loop_filter_bh_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon;
#endif
vp8_loop_filter_bv = vp8_loop_filter_bv_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon;
#endif
vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon;
#endif
vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon;
#endif
vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon;
#endif
vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon;
#endif
vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon;
#endif
vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon;
#endif
vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon;
#endif
vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon;
#endif
vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon;
#endif
vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon;
#endif
vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon;
#endif
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

117
thirdparty/libvpx/rtcd/vp8_rtcd_c.h vendored Normal file
View file

@ -0,0 +1,117 @@
#ifndef VP8_RTCD_H_
#define VP8_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* VP8
*/
struct blockd;
struct loop_filter_info;
#ifdef __cplusplus
extern "C" {
#endif
void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c
void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c
void vp8_clear_system_state_c();
#define vp8_clear_system_state vp8_clear_system_state_c
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
#define vp8_copy_mem16x16 vp8_copy_mem16x16_c
void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
#define vp8_copy_mem8x4 vp8_copy_mem8x4_c
void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
#define vp8_copy_mem8x8 vp8_copy_mem8x8_c
void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
#define vp8_dc_only_idct_add vp8_dc_only_idct_add_c
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
#define vp8_dequant_idct_add vp8_dequant_idct_add_c
void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
void vp8_dequantize_b_c(struct blockd*, short *dqc);
#define vp8_dequantize_b vp8_dequantize_b_c
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bh vp8_loop_filter_bh_c
void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_bv vp8_loop_filter_bv_c
void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_mbh vp8_loop_filter_mbh_c
void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
#define vp8_loop_filter_mbv vp8_loop_filter_mbv_c
void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
#define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c
void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
#define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c
void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
#define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c
void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
#define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
#define vp8_short_idct4x4llm vp8_short_idct4x4llm_c
void vp8_short_inv_walsh4x4_c(short *input, short *output);
#define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c
void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c
void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
#define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
void vp8_rtcd(void);
#ifdef RTCD_C
static void setup_rtcd_internal(void)
{
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

247
thirdparty/libvpx/rtcd/vp8_rtcd_x86.h vendored Normal file
View file

@ -0,0 +1,247 @@
#ifndef VP8_RTCD_H_
#define VP8_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* VP8
*/
struct blockd;
struct loop_filter_info;
#ifdef __cplusplus
extern "C" {
#endif
void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_clear_system_state_c();
void vpx_reset_mmx_state();
RTCD_EXTERN void (*vp8_clear_system_state)();
void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x4_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_copy_mem8x8_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
void vp8_dequantize_b_c(struct blockd*, short *dqc);
void vp8_dequantize_b_mmx(struct blockd*, short *dqc);
RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc);
void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bhs_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bvs_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_horizontal_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_vertical_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
void vp8_short_inv_walsh4x4_c(short *input, short *output);
void vp8_short_inv_walsh4x4_mmx(short *input, short *output);
void vp8_short_inv_walsh4x4_sse2(short *input, short *output);
RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
#define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
void vp8_rtcd(void);
#ifdef RTCD_C
#include "vpx_ports/x86.h"
static void setup_rtcd_internal(void)
{
int flags = x86_simd_caps();
vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx;
if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx;
if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
vp8_clear_system_state = vp8_clear_system_state_c;
if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state;
vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx;
if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx;
vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
vp8_dequant_idct_add = vp8_dequant_idct_add_c;
if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
vp8_dequantize_b = vp8_dequantize_b_c;
if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
vp8_loop_filter_bh = vp8_loop_filter_bh_c;
if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
vp8_loop_filter_bv = vp8_loop_filter_bv_c;
if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx;
if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx;
if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx;
if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx;
if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

54
thirdparty/libvpx/rtcd/vp9_rtcd_arm.h vendored Normal file
View file

@ -0,0 +1,54 @@
#ifndef VP9_RTCD_H_
#define VP9_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* VP9
*/
#include "vp9/common/vp9_common.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_rtcd(void);
#ifdef RTCD_C
#include "vpx_ports/arm.h"
static void setup_rtcd_internal(void)
{
int flags = arm_cpu_caps();
vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon;
#endif
vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon;
#endif
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

41
thirdparty/libvpx/rtcd/vp9_rtcd_c.h vendored Normal file
View file

@ -0,0 +1,41 @@
#ifndef VP9_RTCD_H_
#define VP9_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* VP9
*/
#include "vp9/common/vp9_common.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
#define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
#define vp9_iht4x4_16_add vp9_iht4x4_16_add_c
void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
#define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
void vp9_rtcd(void);
#ifdef RTCD_C
static void setup_rtcd_internal(void)
{
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

55
thirdparty/libvpx/rtcd/vp9_rtcd_x86.h vendored Normal file
View file

@ -0,0 +1,55 @@
#ifndef VP9_RTCD_H_
#define VP9_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* VP9
*/
#include "vp9/common/vp9_common.h"
#ifdef __cplusplus
extern "C" {
#endif
void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
void vp9_rtcd(void);
#ifdef RTCD_C
#include "vpx_ports/x86.h"
static void setup_rtcd_internal(void)
{
int flags = x86_simd_caps();
vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

View file

@ -0,0 +1,678 @@
#ifndef VPX_DSP_RTCD_H_
#define VPX_DSP_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* DSP
*/
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#ifdef __cplusplus
extern "C" {
#endif
void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c
void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c
void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c
void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c
void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c
void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c
void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c
void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c
void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c
void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_2d vpx_scaled_2d_c
void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_horiz vpx_scaled_horiz_c
void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_vert vpx_scaled_vert_c
void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
#include "vpx_ports/arm.h"
static void setup_rtcd_internal(void)
{
int flags = arm_cpu_caps();
vpx_convolve8 = vpx_convolve8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve8 = vpx_convolve8_neon;
#endif
vpx_convolve8_avg = vpx_convolve8_avg_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve8_avg = vpx_convolve8_avg_neon;
#endif
vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_neon;
#endif
vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_neon;
#endif
vpx_convolve8_horiz = vpx_convolve8_horiz_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve8_horiz = vpx_convolve8_horiz_neon;
#endif
vpx_convolve8_vert = vpx_convolve8_vert_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve8_vert = vpx_convolve8_vert_neon;
#endif
vpx_convolve_avg = vpx_convolve_avg_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve_avg = vpx_convolve_avg_neon;
#endif
vpx_convolve_copy = vpx_convolve_copy_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_convolve_copy = vpx_convolve_copy_neon;
#endif
vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_neon;
#endif
vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_neon;
#endif
vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_neon;
#endif
vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_neon;
#endif
vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_neon;
#endif
vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_neon;
#endif
vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_neon;
#endif
vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_neon;
#endif
vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_neon;
#endif
vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_neon;
#endif
vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_neon;
#endif
vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_neon;
#endif
vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_neon;
#endif
vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_neon;
#endif
vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_neon;
#endif
vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_neon;
#endif
vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_neon;
#endif
vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_neon;
#endif
vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_neon;
#endif
vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_neon;
#endif
vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_neon;
#endif
vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_neon;
#endif
vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_neon;
#endif
vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_neon;
#endif
vpx_idct16x16_10_add = vpx_idct16x16_10_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct16x16_10_add = vpx_idct16x16_10_add_neon;
#endif
vpx_idct16x16_1_add = vpx_idct16x16_1_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct16x16_1_add = vpx_idct16x16_1_add_neon;
#endif
vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct16x16_256_add = vpx_idct16x16_256_add_neon;
#endif
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_neon;
#endif
vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_neon;
#endif
vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct32x32_1_add = vpx_idct32x32_1_add_neon;
#endif
vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct32x32_34_add = vpx_idct32x32_1024_add_neon;
#endif
vpx_idct4x4_16_add = vpx_idct4x4_16_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct4x4_16_add = vpx_idct4x4_16_add_neon;
#endif
vpx_idct4x4_1_add = vpx_idct4x4_1_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct4x4_1_add = vpx_idct4x4_1_add_neon;
#endif
vpx_idct8x8_12_add = vpx_idct8x8_12_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct8x8_12_add = vpx_idct8x8_12_add_neon;
#endif
vpx_idct8x8_1_add = vpx_idct8x8_1_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct8x8_1_add = vpx_idct8x8_1_add_neon;
#endif
vpx_idct8x8_64_add = vpx_idct8x8_64_add_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_idct8x8_64_add = vpx_idct8x8_64_add_neon;
#endif
vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_neon;
#endif
vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_neon;
#endif
vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_neon;
#endif
vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_neon;
#endif
vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_neon;
#endif
vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_neon;
#endif
vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_neon;
#endif
vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_neon;
#endif
vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_neon;
#endif
vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_neon;
#endif
vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_neon;
#endif
vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_neon;
#endif
vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon;
#endif
vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_neon;
#endif
vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_neon;
#endif
vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_neon;
#endif
vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_neon;
#endif
vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_neon;
#endif
vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_neon;
#endif
vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c;
#if HAVE_NEON
if (flags & HAS_NEON) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_neon;
#endif
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

355
thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h vendored Normal file
View file

@ -0,0 +1,355 @@
#ifndef VPX_DSP_RTCD_H_
#define VPX_DSP_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* DSP
*/
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#ifdef __cplusplus
extern "C" {
#endif
void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve8 vpx_convolve8_c
void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve8_avg vpx_convolve8_avg_c
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve8_horiz vpx_convolve8_horiz_c
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve8_vert vpx_convolve8_vert_c
void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve_avg vpx_convolve_avg_c
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_convolve_copy vpx_convolve_copy_c
void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c
void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c
void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c
void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c
void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c
void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c
void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c
void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c
void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c
void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c
void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c
void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c
void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c
void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c
void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c
void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c
void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c
void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c
void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c
void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c
void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c
void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c
void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c
void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c
void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c
void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c
void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c
void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_1_add vpx_idct16x16_1_add_c
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct16x16_256_add vpx_idct16x16_256_add_c
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct32x32_135_add vpx_idct32x32_135_add_c
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct32x32_1_add vpx_idct32x32_1_add_c
void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct32x32_34_add vpx_idct32x32_34_add_c
void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct4x4_16_add vpx_idct4x4_16_add_c
void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct4x4_1_add vpx_idct4x4_1_add_c
void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct8x8_12_add vpx_idct8x8_12_add_c
void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct8x8_1_add vpx_idct8x8_1_add_c
void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_idct8x8_64_add vpx_idct8x8_64_add_c
void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c
void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c
void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c
void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c
void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c
void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_horizontal_edge_16 vpx_lpf_horizontal_edge_16_c
void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_horizontal_edge_8 vpx_lpf_horizontal_edge_8_c
void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c
void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c
void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
#define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c
void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
#define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c
void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_2d vpx_scaled_2d_c
void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_horiz vpx_scaled_horiz_c
void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_vert vpx_scaled_vert_c
void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c
void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c
void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c
void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c
void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c
void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c
void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c
void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c
void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
static void setup_rtcd_internal(void)
{
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

View file

@ -0,0 +1,614 @@
#ifndef VPX_DSP_RTCD_H_
#define VPX_DSP_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
/*
* DSP
*/
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#ifdef __cplusplus
extern "C" {
#endif
void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c
void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d153_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d153_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d153_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d207_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d207_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d207_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d207_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d45_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d63_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d63_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d63_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_iwht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
RTCD_EXTERN void (*vpx_iwht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
#define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
RTCD_EXTERN void (*vpx_scaled_2d)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_horiz vpx_scaled_horiz_c
void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
#define vpx_scaled_vert vpx_scaled_vert_c
void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_tm_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
#define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
void vpx_dsp_rtcd(void);
#ifdef RTCD_C
#include "vpx_ports/x86.h"
static void setup_rtcd_internal(void)
{
int flags = x86_simd_caps();
vpx_convolve8 = vpx_convolve8_c;
if (flags & HAS_SSE2) vpx_convolve8 = vpx_convolve8_sse2;
if (flags & HAS_SSSE3) vpx_convolve8 = vpx_convolve8_ssse3;
if (flags & HAS_AVX2) vpx_convolve8 = vpx_convolve8_avx2;
vpx_convolve8_avg = vpx_convolve8_avg_c;
if (flags & HAS_SSE2) vpx_convolve8_avg = vpx_convolve8_avg_sse2;
if (flags & HAS_SSSE3) vpx_convolve8_avg = vpx_convolve8_avg_ssse3;
vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c;
if (flags & HAS_SSE2) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_sse2;
if (flags & HAS_SSSE3) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_ssse3;
vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c;
if (flags & HAS_SSE2) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_sse2;
if (flags & HAS_SSSE3) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_ssse3;
vpx_convolve8_horiz = vpx_convolve8_horiz_c;
if (flags & HAS_SSE2) vpx_convolve8_horiz = vpx_convolve8_horiz_sse2;
if (flags & HAS_SSSE3) vpx_convolve8_horiz = vpx_convolve8_horiz_ssse3;
if (flags & HAS_AVX2) vpx_convolve8_horiz = vpx_convolve8_horiz_avx2;
vpx_convolve8_vert = vpx_convolve8_vert_c;
if (flags & HAS_SSE2) vpx_convolve8_vert = vpx_convolve8_vert_sse2;
if (flags & HAS_SSSE3) vpx_convolve8_vert = vpx_convolve8_vert_ssse3;
if (flags & HAS_AVX2) vpx_convolve8_vert = vpx_convolve8_vert_avx2;
vpx_convolve_avg = vpx_convolve_avg_c;
if (flags & HAS_SSE2) vpx_convolve_avg = vpx_convolve_avg_sse2;
vpx_convolve_copy = vpx_convolve_copy_c;
if (flags & HAS_SSE2) vpx_convolve_copy = vpx_convolve_copy_sse2;
vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_c;
if (flags & HAS_SSSE3) vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_ssse3;
vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_c;
if (flags & HAS_SSSE3) vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_ssse3;
vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_c;
if (flags & HAS_SSSE3) vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_ssse3;
vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_c;
if (flags & HAS_SSSE3) vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_ssse3;
vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_c;
if (flags & HAS_SSSE3) vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_ssse3;
vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_c;
if (flags & HAS_SSSE3) vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_ssse3;
vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_sse2;
vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_c;
if (flags & HAS_SSSE3) vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_ssse3;
vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c;
if (flags & HAS_SSSE3) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_ssse3;
vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_c;
if (flags & HAS_SSSE3) vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_ssse3;
vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_sse2;
vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_sse2;
vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_c;
if (flags & HAS_SSSE3) vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_ssse3;
vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_c;
if (flags & HAS_SSSE3) vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_ssse3;
vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_c;
if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3;
vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c;
if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3;
vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c;
if (flags & HAS_SSE2) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_sse2;
vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c;
if (flags & HAS_SSE2) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_sse2;
vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_sse2;
vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_sse2;
vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c;
if (flags & HAS_SSE2) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_sse2;
vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c;
if (flags & HAS_SSE2) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_sse2;
vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_sse2;
vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_sse2;
vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c;
if (flags & HAS_SSE2) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_sse2;
vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c;
if (flags & HAS_SSE2) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_sse2;
vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_sse2;
vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_sse2;
vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c;
if (flags & HAS_SSE2) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_sse2;
vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c;
if (flags & HAS_SSE2) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_sse2;
vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_sse2;
vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2;
vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c;
if (flags & HAS_SSE2) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_sse2;
vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c;
if (flags & HAS_SSE2) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_sse2;
vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_sse2;
vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_sse2;
vpx_idct16x16_10_add = vpx_idct16x16_10_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_10_add = vpx_idct16x16_10_add_sse2;
vpx_idct16x16_1_add = vpx_idct16x16_1_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2;
vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_sse2;
vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2;
vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
if (flags & HAS_SSE2) vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2;
vpx_idct4x4_16_add = vpx_idct4x4_16_add_c;
if (flags & HAS_SSE2) vpx_idct4x4_16_add = vpx_idct4x4_16_add_sse2;
vpx_idct4x4_1_add = vpx_idct4x4_1_add_c;
if (flags & HAS_SSE2) vpx_idct4x4_1_add = vpx_idct4x4_1_add_sse2;
vpx_idct8x8_12_add = vpx_idct8x8_12_add_c;
if (flags & HAS_SSE2) vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2;
vpx_idct8x8_1_add = vpx_idct8x8_1_add_c;
if (flags & HAS_SSE2) vpx_idct8x8_1_add = vpx_idct8x8_1_add_sse2;
vpx_idct8x8_64_add = vpx_idct8x8_64_add_c;
if (flags & HAS_SSE2) vpx_idct8x8_64_add = vpx_idct8x8_64_add_sse2;
vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_c;
if (flags & HAS_SSE2) vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_sse2;
vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c;
if (flags & HAS_SSE2) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_sse2;
vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c;
if (flags & HAS_SSE2) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_sse2;
vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c;
if (flags & HAS_SSE2) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_sse2;
vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c;
if (flags & HAS_SSE2) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_sse2;
vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c;
if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_sse2;
if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_avx2;
vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c;
if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_sse2;
if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_avx2;
vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c;
if (flags & HAS_SSE2) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_sse2;
vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c;
if (flags & HAS_SSE2) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_sse2;
vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c;
if (flags & HAS_SSE2) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_sse2;
vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c;
if (flags & HAS_SSE2) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_sse2;
vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c;
if (flags & HAS_SSE2) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_sse2;
vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c;
if (flags & HAS_SSE2) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_sse2;
vpx_scaled_2d = vpx_scaled_2d_c;
if (flags & HAS_SSSE3) vpx_scaled_2d = vpx_scaled_2d_ssse3;
vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c;
if (flags & HAS_SSE2) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_sse2;
vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c;
if (flags & HAS_SSE2) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_sse2;
vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_sse2;
vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_sse2;
vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c;
if (flags & HAS_SSE2) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_sse2;
vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c;
if (flags & HAS_SSE2) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_sse2;
vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c;
if (flags & HAS_SSE2) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_sse2;
vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c;
if (flags & HAS_SSE2) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_sse2;
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif

9
thirdparty/libvpx/vp8_rtcd.h vendored Normal file
View file

@ -0,0 +1,9 @@
#include "vpx_config.h"
#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
#include "rtcd/vp8_rtcd_x86.h"
#elif defined(WEBM_ARMASM) && ARCH_ARM
#include "rtcd/vp8_rtcd_arm.h"
#else
#include "rtcd/vp8_rtcd_c.h"
#endif

9
thirdparty/libvpx/vp9_rtcd.h vendored Normal file
View file

@ -0,0 +1,9 @@
#include "vpx_config.h"
#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
#include "rtcd/vp9_rtcd_x86.h"
#elif defined(WEBM_ARMASM) && ARCH_ARM
#include "rtcd/vp9_rtcd_arm.h"
#else
#include "rtcd/vp9_rtcd_c.h"
#endif

65
thirdparty/libvpx/vpx_config.asm vendored Normal file
View file

@ -0,0 +1,65 @@
%ifdef X86_32
ARCH_X86 equ 1
ARCH_X86_64 equ 0
%elifdef X86_64
ARCH_X86 equ 0
ARCH_X86_64 equ 1
%endif
HAVE_VPX_PORTS equ 1
CONFIG_DEPENDENCY_TRACKING equ 0
CONFIG_EXTERNAL_BUILD equ 0
CONFIG_INSTALL_DOCS equ 0
CONFIG_INSTALL_BINS equ 0
CONFIG_INSTALL_LIBS equ 0
CONFIG_INSTALL_SRCS equ 0
CONFIG_USE_X86INC equ 1
CONFIG_DEBUG equ 0
CONFIG_GPROF equ 0
CONFIG_GCOV equ 0
CONFIG_RVCT equ 0
CONFIG_PIC equ 1 ;TODO: autodetect
CONFIG_CODEC_SRCS equ 0
CONFIG_DEBUG_LIBS equ 0
CONFIG_DEQUANT_TOKENS equ 0
CONFIG_DC_RECON equ 0
CONFIG_RUNTIME_CPU_DETECT equ 1
CONFIG_POSTPROC equ 0
CONFIG_VP9_POSTPROC equ 0
CONFIG_MULTITHREAD equ 1
CONFIG_INTERNAL_STATS equ 0
CONFIG_VP8_ENCODER equ 0
CONFIG_VP8_DECODER equ 1
CONFIG_VP9_ENCODER equ 0
CONFIG_VP9_DECODER equ 1
CONFIG_VP8 equ 1
CONFIG_VP9 equ 1
CONFIG_ENCODERS equ 0
CONFIG_DECODERS equ 1
CONFIG_STATIC_MSVCRT equ 0
CONFIG_SPATIAL_RESAMPLING equ 0
CONFIG_REALTIME_ONLY equ 0
CONFIG_ONTHEFLY_BITPACKING equ 0
CONFIG_ERROR_CONCEALMENT equ 0
CONFIG_SHARED equ 0
CONFIG_STATIC equ 0
CONFIG_SMALL equ 0
CONFIG_POSTPROC_VISUALIZER equ 0
CONFIG_OS_SUPPORT equ 1
CONFIG_UNIT_TESTS equ 0
CONFIG_WEBM_IO equ 0
CONFIG_LIBYUV equ 0
CONFIG_DECODE_PERF_TESTS equ 0
CONFIG_ENCODE_PERF_TESTS equ 0
CONFIG_MULTI_RES_ENCODING equ 0
CONFIG_TEMPORAL_DENOISING equ 1
CONFIG_VP9_TEMPORAL_DENOISING equ 0
CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
CONFIG_VP9_HIGHBITDEPTH equ 0
CONFIG_BETTER_HW_COMPATIBILITY equ 0
CONFIG_EXPERIMENTAL equ 0
CONFIG_SIZE_LIMIT equ 0
CONFIG_SPATIAL_SVC equ 0
CONFIG_FP_MB_STATS equ 0
CONFIG_EMULATE_HARDWARE equ 0
CONFIG_MISC_FIXES equ 0

124
thirdparty/libvpx/vpx_config.h vendored Normal file
View file

@ -0,0 +1,124 @@
/* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */
/* */
/* Use of this source code is governed by a BSD-style license */
/* that can be found in the LICENSE file in the root of the source */
/* tree. An additional intellectual property rights grant can be found */
/* in the file PATENTS. All contributing project authors may */
/* be found in the AUTHORS file in the root of the source tree. */
/* This file automatically generated by configure. Do not edit! */
#ifndef VPX_CONFIG_H
#define VPX_CONFIG_H
#define RESTRICT
#define INLINE inline
#define HAVE_MIPS32 0
#define HAVE_MEDIA 0
#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
#define ARCH_X86 1
#define ARCH_X86_64 0
#define ARCH_ARM 0
#define HAVE_NEON 0
#define HAVE_NEON_ASM 0
#define HAVE_MMX 1
#define HAVE_SSE2 1
#define HAVE_SSSE3 1
#define HAVE_AVX2 1
#elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
#define ARCH_X86 0
#define ARCH_X86_64 1
#define ARCH_ARM 0
#define HAVE_NEON 0
#define HAVE_NEON_ASM 0
#define HAVE_MMX 1
#define HAVE_SSE2 1
#define HAVE_SSSE3 1
#define HAVE_AVX2 1
#elif defined(__arm__) || defined(__TARGET_ARCH_ARM) || defined(_M_ARM) || defined(__aarch64__)
#define ARCH_X86 0
#define ARCH_X86_64 0
#define ARCH_ARM 1
#define HAVE_NEON 1
#define HAVE_NEON_ASM 1
#else
#define ARCH_X86 0
#define ARCH_X86_64 0
#define ARCH_ARM 0
#define HAVE_NEON 0
#define HAVE_NEON_ASM 0
#endif
#define CONFIG_BIG_ENDIAN 0 //TODO: Autodetect
#ifdef _WIN32
#define HAVE_PTHREAD_H 0
#define HAVE_UNISTD_H 0
#else
#define HAVE_PTHREAD_H 1
#define HAVE_UNISTD_H 1
#endif
/**/
#define HAVE_VPX_PORTS 1
#define CONFIG_DEPENDENCY_TRACKING 0
#define CONFIG_EXTERNAL_BUILD 0
#define CONFIG_INSTALL_DOCS 0
#define CONFIG_INSTALL_BINS 0
#define CONFIG_INSTALL_LIBS 0
#define CONFIG_INSTALL_SRCS 0
#define CONFIG_DEBUG 0
#define CONFIG_GPROF 0
#define CONFIG_GCOV 0
#define CONFIG_RVCT 0
#define CONFIG_CODEC_SRCS 0
#define CONFIG_DEBUG_LIBS 0
#define CONFIG_DEQUANT_TOKENS 0
#define CONFIG_DC_RECON 0
#define CONFIG_RUNTIME_CPU_DETECT 1
#define CONFIG_POSTPROC 0
#define CONFIG_VP9_POSTPROC 0
#define CONFIG_MULTITHREAD 1
#define CONFIG_INTERNAL_STATS 0
#define CONFIG_VP8_ENCODER 0
#define CONFIG_VP8_DECODER 1
#define CONFIG_VP9_ENCODER 0
#define CONFIG_VP9_DECODER 1
#define CONFIG_VP8 1
#define CONFIG_VP9 1
#define CONFIG_ENCODERS 0
#define CONFIG_DECODERS 1
#define CONFIG_STATIC_MSVCRT 0
#define CONFIG_SPATIAL_RESAMPLING 0
#define CONFIG_REALTIME_ONLY 0
#define CONFIG_ONTHEFLY_BITPACKING 0
#define CONFIG_ERROR_CONCEALMENT 0
#define CONFIG_SHARED 0
#define CONFIG_STATIC 0
#define CONFIG_SMALL 0
#define CONFIG_POSTPROC_VISUALIZER 0
#define CONFIG_OS_SUPPORT 1
#define CONFIG_UNIT_TESTS 0
#define CONFIG_WEBM_IO 0
#define CONFIG_LIBYUV 0
#define CONFIG_DECODE_PERF_TESTS 0
#define CONFIG_ENCODE_PERF_TESTS 0
#define CONFIG_MULTI_RES_ENCODING 0
#define CONFIG_TEMPORAL_DENOISING 0
#define CONFIG_VP9_TEMPORAL_DENOISING 0
#define CONFIG_COEFFICIENT_RANGE_CHECKING 0
#define CONFIG_VP9_HIGHBITDEPTH 0
#define CONFIG_BETTER_HW_COMPATIBILITY 0
#define CONFIG_EXPERIMENTAL 0
#define CONFIG_SIZE_LIMIT 0
#define CONFIG_SPATIAL_SVC 0
#define CONFIG_FP_MB_STATS 0
#define CONFIG_EMULATE_HARDWARE 0
#define CONFIG_MISC_FIXES 0
#endif /* VPX_CONFIG_H */

View file

@ -1,3 +1,5 @@
; This file was created from a .asm file
; using the ads2armasm_ms.pl script.
; ;
; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
; ;
@ -20,11 +22,10 @@
EXPORT |vpx_tm_predictor_8x8_neon| EXPORT |vpx_tm_predictor_8x8_neon|
EXPORT |vpx_tm_predictor_16x16_neon| EXPORT |vpx_tm_predictor_16x16_neon|
EXPORT |vpx_tm_predictor_32x32_neon| EXPORT |vpx_tm_predictor_32x32_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2 AREA |.text|, CODE, READONLY, ALIGN=2
;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -42,6 +43,7 @@
vst1.32 {d0[0]}, [r0], r1 vst1.32 {d0[0]}, [r0], r1
bx lr bx lr
ENDP ; |vpx_v_predictor_4x4_neon| ENDP ; |vpx_v_predictor_4x4_neon|
ALIGN 4
;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -63,6 +65,7 @@
vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1
bx lr bx lr
ENDP ; |vpx_v_predictor_8x8_neon| ENDP ; |vpx_v_predictor_8x8_neon|
ALIGN 4
;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -92,6 +95,7 @@
vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1
bx lr bx lr
ENDP ; |vpx_v_predictor_16x16_neon| ENDP ; |vpx_v_predictor_16x16_neon|
ALIGN 4
;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -125,6 +129,7 @@ loop_v
bgt loop_v bgt loop_v
bx lr bx lr
ENDP ; |vpx_v_predictor_32x32_neon| ENDP ; |vpx_v_predictor_32x32_neon|
ALIGN 4
;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -146,6 +151,7 @@ loop_v
vst1.32 {d0[0]}, [r0], r1 vst1.32 {d0[0]}, [r0], r1
bx lr bx lr
ENDP ; |vpx_h_predictor_4x4_neon| ENDP ; |vpx_h_predictor_4x4_neon|
ALIGN 4
;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -175,6 +181,7 @@ loop_v
vst1.64 {d0}, [r0], r1 vst1.64 {d0}, [r0], r1
bx lr bx lr
ENDP ; |vpx_h_predictor_8x8_neon| ENDP ; |vpx_h_predictor_8x8_neon|
ALIGN 4
;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -220,6 +227,7 @@ loop_v
vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1
bx lr bx lr
ENDP ; |vpx_h_predictor_16x16_neon| ENDP ; |vpx_h_predictor_16x16_neon|
ALIGN 4
;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, ;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -286,6 +294,7 @@ loop_h
bgt loop_h bgt loop_h
bx lr bx lr
ENDP ; |vpx_h_predictor_32x32_neon| ENDP ; |vpx_h_predictor_32x32_neon|
ALIGN 4
;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, ;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -332,6 +341,7 @@ loop_h
vst1.32 {d1[0]}, [r0], r1 vst1.32 {d1[0]}, [r0], r1
bx lr bx lr
ENDP ; |vpx_tm_predictor_4x4_neon| ENDP ; |vpx_tm_predictor_4x4_neon|
ALIGN 4
;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, ;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -404,6 +414,7 @@ loop_h
bx lr bx lr
ENDP ; |vpx_tm_predictor_8x8_neon| ENDP ; |vpx_tm_predictor_8x8_neon|
ALIGN 4
;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, ;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -497,6 +508,7 @@ loop_16x16_neon
bx lr bx lr
ENDP ; |vpx_tm_predictor_16x16_neon| ENDP ; |vpx_tm_predictor_16x16_neon|
ALIGN 4
;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, ;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
; const uint8_t *above, ; const uint8_t *above,
@ -626,5 +638,6 @@ loop_32x32_neon
bx lr bx lr
ENDP ; |vpx_tm_predictor_32x32_neon| ENDP ; |vpx_tm_predictor_32x32_neon|
ALIGN 4
END END

View file

@ -1,3 +1,5 @@
; This file was created from a .asm file
; using the ads2armasm_ms.pl script.
; ;
; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; Copyright (c) 2013 The WebM project authors. All Rights Reserved.
; ;
@ -11,9 +13,8 @@
EXPORT |vpx_lpf_horizontal_edge_8_neon| EXPORT |vpx_lpf_horizontal_edge_8_neon|
EXPORT |vpx_lpf_horizontal_edge_16_neon| EXPORT |vpx_lpf_horizontal_edge_16_neon|
EXPORT |vpx_lpf_vertical_16_neon| EXPORT |vpx_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2 AREA |.text|, CODE, READONLY, ALIGN=2
; void mb_lpf_horizontal_edge(uint8_t *s, int p, ; void mb_lpf_horizontal_edge(uint8_t *s, int p,
; const uint8_t *blimit, ; const uint8_t *blimit,
@ -117,6 +118,7 @@ h_next
pop {r4-r8, pc} pop {r4-r8, pc}
ENDP ; |mb_lpf_horizontal_edge| ENDP ; |mb_lpf_horizontal_edge|
ALIGN 4
; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, ; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
; const uint8_t *blimit, ; const uint8_t *blimit,
@ -131,6 +133,7 @@ h_next
mov r12, #1 mov r12, #1
b mb_lpf_horizontal_edge b mb_lpf_horizontal_edge
ENDP ; |vpx_lpf_horizontal_edge_8_neon| ENDP ; |vpx_lpf_horizontal_edge_8_neon|
ALIGN 4
; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, ; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
; const uint8_t *blimit, ; const uint8_t *blimit,
@ -145,6 +148,7 @@ h_next
mov r12, #2 mov r12, #2
b mb_lpf_horizontal_edge b mb_lpf_horizontal_edge
ENDP ; |vpx_lpf_horizontal_edge_16_neon| ENDP ; |vpx_lpf_horizontal_edge_16_neon|
ALIGN 4
; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, ; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
; const uint8_t *blimit, ; const uint8_t *blimit,
@ -309,6 +313,7 @@ v_end
pop {r4-r8, pc} pop {r4-r8, pc}
ENDP ; |vpx_lpf_vertical_16_neon| ENDP ; |vpx_lpf_vertical_16_neon|
ALIGN 4
; void vpx_wide_mbfilter_neon(); ; void vpx_wide_mbfilter_neon();
; This is a helper function for the loopfilters. The invidual functions do the ; This is a helper function for the loopfilters. The invidual functions do the
@ -631,5 +636,6 @@ v_end
bx lr bx lr
ENDP ; |vpx_wide_mbfilter_neon| ENDP ; |vpx_wide_mbfilter_neon|
ALIGN 4
END END

View file

@ -1,3 +1,5 @@
; This file was created from a .asm file
; using the ads2armasm_ms.pl script.
; ;
; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
; ;
@ -12,11 +14,10 @@
EXPORT |vpx_push_neon| EXPORT |vpx_push_neon|
EXPORT |vpx_pop_neon| EXPORT |vpx_pop_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2 AREA |.text|, CODE, READONLY, ALIGN=2
|vpx_push_neon| PROC |vpx_push_neon| PROC
vst1.i64 {d8, d9, d10, d11}, [r0]! vst1.i64 {d8, d9, d10, d11}, [r0]!
@ -24,6 +25,7 @@
bx lr bx lr
ENDP ENDP
ALIGN 4
|vpx_pop_neon| PROC |vpx_pop_neon| PROC
vld1.i64 {d8, d9, d10, d11}, [r0]! vld1.i64 {d8, d9, d10, d11}, [r0]!
@ -31,6 +33,7 @@
bx lr bx lr
ENDP ENDP
ALIGN 4
END END

View file

@ -0,0 +1,658 @@
@ This file was created from a .asm file
@ using the ads2gas.pl script.
.equ DO1STROUNDING, 0
@
@ Copyright (c) 2014 The WebM project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
.global vpx_v_predictor_4x4_neon
.type vpx_v_predictor_4x4_neon, function
.global vpx_v_predictor_8x8_neon
.type vpx_v_predictor_8x8_neon, function
.global vpx_v_predictor_16x16_neon
.type vpx_v_predictor_16x16_neon, function
.global vpx_v_predictor_32x32_neon
.type vpx_v_predictor_32x32_neon, function
.global vpx_h_predictor_4x4_neon
.type vpx_h_predictor_4x4_neon, function
.global vpx_h_predictor_8x8_neon
.type vpx_h_predictor_8x8_neon, function
.global vpx_h_predictor_16x16_neon
.type vpx_h_predictor_16x16_neon, function
.global vpx_h_predictor_32x32_neon
.type vpx_h_predictor_32x32_neon, function
.global vpx_tm_predictor_4x4_neon
.type vpx_tm_predictor_4x4_neon, function
.global vpx_tm_predictor_8x8_neon
.type vpx_tm_predictor_8x8_neon, function
.global vpx_tm_predictor_16x16_neon
.type vpx_tm_predictor_16x16_neon, function
.global vpx_tm_predictor_32x32_neon
.type vpx_tm_predictor_32x32_neon, function
.arm
.eabi_attribute 24, 1 @Tag_ABI_align_needed
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
.text
.p2align 2
@void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_4x4_neon:
vpx_v_predictor_4x4_neon: @ PROC
vld1.32 {d0[0]}, [r2]
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d0[0]}, [r0], r1
bx lr
.size vpx_v_predictor_4x4_neon, .-vpx_v_predictor_4x4_neon @ ENDP @ |vpx_v_predictor_4x4_neon|
@void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_8x8_neon:
vpx_v_predictor_8x8_neon: @ PROC
vld1.8 {d0}, [r2]
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
bx lr
.size vpx_v_predictor_8x8_neon, .-vpx_v_predictor_8x8_neon @ ENDP @ |vpx_v_predictor_8x8_neon|
@void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_16x16_neon:
vpx_v_predictor_16x16_neon: @ PROC
vld1.8 {q0}, [r2]
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
bx lr
.size vpx_v_predictor_16x16_neon, .-vpx_v_predictor_16x16_neon @ ENDP @ |vpx_v_predictor_16x16_neon|
@void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_32x32_neon:
vpx_v_predictor_32x32_neon: @ PROC
vld1.8 {q0, q1}, [r2]
mov r2, #2
loop_v:
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
subs r2, r2, #1
bgt loop_v
bx lr
.size vpx_v_predictor_32x32_neon, .-vpx_v_predictor_32x32_neon @ ENDP @ |vpx_v_predictor_32x32_neon|
@void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_4x4_neon:
vpx_h_predictor_4x4_neon: @ PROC
vld1.32 {d1[0]}, [r3]
vdup.8 d0, d1[0]
vst1.32 {d0[0]}, [r0], r1
vdup.8 d0, d1[1]
vst1.32 {d0[0]}, [r0], r1
vdup.8 d0, d1[2]
vst1.32 {d0[0]}, [r0], r1
vdup.8 d0, d1[3]
vst1.32 {d0[0]}, [r0], r1
bx lr
.size vpx_h_predictor_4x4_neon, .-vpx_h_predictor_4x4_neon @ ENDP @ |vpx_h_predictor_4x4_neon|
@void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_8x8_neon:
vpx_h_predictor_8x8_neon: @ PROC
vld1.64 {d1}, [r3]
vdup.8 d0, d1[0]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[1]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[2]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[3]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[4]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[5]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[6]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[7]
vst1.64 {d0}, [r0], r1
bx lr
.size vpx_h_predictor_8x8_neon, .-vpx_h_predictor_8x8_neon @ ENDP @ |vpx_h_predictor_8x8_neon|
@void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_16x16_neon:
vpx_h_predictor_16x16_neon: @ PROC
vld1.8 {q1}, [r3]
vdup.8 q0, d2[0]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[1]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[2]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[3]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[4]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[5]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[6]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[7]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[0]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[1]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[2]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[3]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[4]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[5]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[6]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[7]
vst1.8 {q0}, [r0], r1
bx lr
.size vpx_h_predictor_16x16_neon, .-vpx_h_predictor_16x16_neon @ ENDP @ |vpx_h_predictor_16x16_neon|
@void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_32x32_neon:
vpx_h_predictor_32x32_neon: @ PROC
sub r1, r1, #16
mov r2, #2
loop_h:
vld1.8 {q1}, [r3]!
vdup.8 q0, d2[0]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[1]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[2]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[3]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[4]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[5]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[6]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[7]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[0]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[1]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[2]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[3]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[4]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[5]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[6]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[7]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
subs r2, r2, #1
bgt loop_h
bx lr
.size vpx_h_predictor_32x32_neon, .-vpx_h_predictor_32x32_neon @ ENDP @ |vpx_h_predictor_32x32_neon|
@void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_4x4_neon:
vpx_tm_predictor_4x4_neon: @ PROC
@ Load ytop_left = above[-1];
sub r12, r2, #1
vld1.u8 {d0[]}, [r12]
@ Load above 4 pixels
vld1.32 {d2[0]}, [r2]
@ Compute above - ytop_left
vsubl.u8 q3, d2, d0
@ Load left row by row and compute left + (above - ytop_left)
@ 1st row and 2nd row
vld1.u8 {d2[]}, [r3]!
vld1.u8 {d4[]}, [r3]!
vmovl.u8 q1, d2
vmovl.u8 q2, d4
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
vqmovun.s16 d0, q1
vqmovun.s16 d1, q2
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
@ 3rd row and 4th row
vld1.u8 {d2[]}, [r3]!
vld1.u8 {d4[]}, [r3]
vmovl.u8 q1, d2
vmovl.u8 q2, d4
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
vqmovun.s16 d0, q1
vqmovun.s16 d1, q2
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
bx lr
.size vpx_tm_predictor_4x4_neon, .-vpx_tm_predictor_4x4_neon @ ENDP @ |vpx_tm_predictor_4x4_neon|
@void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_8x8_neon:
vpx_tm_predictor_8x8_neon: @ PROC
@ Load ytop_left = above[-1];
sub r12, r2, #1
vld1.8 {d0[]}, [r12]
@ preload 8 left
vld1.8 {d30}, [r3]
@ Load above 8 pixels
vld1.64 {d2}, [r2]
vmovl.u8 q10, d30
@ Compute above - ytop_left
vsubl.u8 q3, d2, d0
@ Load left row by row and compute left + (above - ytop_left)
@ 1st row and 2nd row
vdup.16 q0, d20[0]
vdup.16 q1, d20[1]
vadd.s16 q0, q3, q0
vadd.s16 q1, q3, q1
@ 3rd row and 4th row
vdup.16 q8, d20[2]
vdup.16 q9, d20[3]
vadd.s16 q8, q3, q8
vadd.s16 q9, q3, q9
vqmovun.s16 d0, q0
vqmovun.s16 d1, q1
vqmovun.s16 d2, q8
vqmovun.s16 d3, q9
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
vst1.64 {d2}, [r0], r1
vst1.64 {d3}, [r0], r1
@ 5th row and 6th row
vdup.16 q0, d21[0]
vdup.16 q1, d21[1]
vadd.s16 q0, q3, q0
vadd.s16 q1, q3, q1
@ 7th row and 8th row
vdup.16 q8, d21[2]
vdup.16 q9, d21[3]
vadd.s16 q8, q3, q8
vadd.s16 q9, q3, q9
vqmovun.s16 d0, q0
vqmovun.s16 d1, q1
vqmovun.s16 d2, q8
vqmovun.s16 d3, q9
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
vst1.64 {d2}, [r0], r1
vst1.64 {d3}, [r0], r1
bx lr
.size vpx_tm_predictor_8x8_neon, .-vpx_tm_predictor_8x8_neon @ ENDP @ |vpx_tm_predictor_8x8_neon|
@void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_16x16_neon:
vpx_tm_predictor_16x16_neon: @ PROC
@ Load ytop_left = above[-1];
sub r12, r2, #1
vld1.8 {d0[]}, [r12]
@ Load above 8 pixels
vld1.8 {q1}, [r2]
@ preload 8 left into r12
vld1.8 {d18}, [r3]!
@ Compute above - ytop_left
vsubl.u8 q2, d2, d0
vsubl.u8 q3, d3, d0
vmovl.u8 q10, d18
@ Load left row by row and compute left + (above - ytop_left)
@ Process 8 rows in each single loop and loop 2 times to process 16 rows.
mov r2, #2
loop_16x16_neon:
@ Process two rows.
vdup.16 q0, d20[0]
vdup.16 q8, d20[1]
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vdup.16 q0, d20[2] @ proload next 2 rows data
vdup.16 q8, d20[3]
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
@ Process two rows.
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vdup.16 q0, d21[0] @ proload next 2 rows data
vdup.16 q8, d21[1]
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vdup.16 q0, d21[2] @ proload next 2 rows data
vdup.16 q8, d21[3]
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vld1.8 {d18}, [r3]! @ preload 8 left into r12
vmovl.u8 q10, d18
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
subs r2, r2, #1
bgt loop_16x16_neon
bx lr
.size vpx_tm_predictor_16x16_neon, .-vpx_tm_predictor_16x16_neon @ ENDP @ |vpx_tm_predictor_16x16_neon|
@void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_32x32_neon:
vpx_tm_predictor_32x32_neon: @ PROC
@ Load ytop_left = above[-1];
sub r12, r2, #1
vld1.8 {d0[]}, [r12]
@ Load above 32 pixels
vld1.8 {q1}, [r2]!
vld1.8 {q2}, [r2]
@ preload 8 left pixels
vld1.8 {d26}, [r3]!
@ Compute above - ytop_left
vsubl.u8 q8, d2, d0
vsubl.u8 q9, d3, d0
vsubl.u8 q10, d4, d0
vsubl.u8 q11, d5, d0
vmovl.u8 q3, d26
@ Load left row by row and compute left + (above - ytop_left)
@ Process 8 rows in each single loop and loop 4 times to process 32 rows.
mov r2, #4
loop_32x32_neon:
@ Process two rows.
vdup.16 q0, d6[0]
vdup.16 q2, d6[1]
vadd.s16 q12, q0, q8
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vdup.16 q1, d6[2]
vdup.16 q2, d6[3]
vst1.64 {d24-d27}, [r0], r1
@ Process two rows.
vadd.s16 q12, q1, q8
vadd.s16 q13, q1, q9
vadd.s16 q14, q1, q10
vadd.s16 q15, q1, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vdup.16 q0, d7[0]
vdup.16 q2, d7[1]
vst1.64 {d24-d27}, [r0], r1
@ Process two rows.
vadd.s16 q12, q0, q8
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vdup.16 q0, d7[2]
vdup.16 q2, d7[3]
vst1.64 {d24-d27}, [r0], r1
@ Process two rows.
vadd.s16 q12, q0, q8
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vld1.8 {d0}, [r3]! @ preload 8 left pixels
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vmovl.u8 q3, d0
vst1.64 {d24-d27}, [r0], r1
subs r2, r2, #1
bgt loop_32x32_neon
bx lr
.size vpx_tm_predictor_32x32_neon, .-vpx_tm_predictor_32x32_neon @ ENDP @ |vpx_tm_predictor_32x32_neon|
.section .note.GNU-stack,"",%progbits

View file

@ -0,0 +1,647 @@
@ This file was created from a .asm file
@ using the ads2gas.pl script.
.equ DO1STROUNDING, 0
@
@ Copyright (c) 2013 The WebM project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
.global vpx_lpf_horizontal_edge_8_neon
.type vpx_lpf_horizontal_edge_8_neon, function
.global vpx_lpf_horizontal_edge_16_neon
.type vpx_lpf_horizontal_edge_16_neon, function
.global vpx_lpf_vertical_16_neon
.type vpx_lpf_vertical_16_neon, function
.arm
.text
.p2align 2
@ void mb_lpf_horizontal_edge(uint8_t *s, int p,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh,
@ int count)
@ r0 uint8_t *s,
@ r1 int p, /* pitch */
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh,
@ r12 int count
_mb_lpf_horizontal_edge:
mb_lpf_horizontal_edge: @ PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] @ load thresh
h_count:
vld1.8 {d16[]}, [r2] @ load *blimit
vld1.8 {d17[]}, [r3] @ load *limit
vld1.8 {d18[]}, [r4] @ load *thresh
sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines
vld1.u8 {d0}, [r8,:64], r1 @ p7
vld1.u8 {d1}, [r8,:64], r1 @ p6
vld1.u8 {d2}, [r8,:64], r1 @ p5
vld1.u8 {d3}, [r8,:64], r1 @ p4
vld1.u8 {d4}, [r8,:64], r1 @ p3
vld1.u8 {d5}, [r8,:64], r1 @ p2
vld1.u8 {d6}, [r8,:64], r1 @ p1
vld1.u8 {d7}, [r8,:64], r1 @ p0
vld1.u8 {d8}, [r8,:64], r1 @ q0
vld1.u8 {d9}, [r8,:64], r1 @ q1
vld1.u8 {d10}, [r8,:64], r1 @ q2
vld1.u8 {d11}, [r8,:64], r1 @ q3
vld1.u8 {d12}, [r8,:64], r1 @ q4
vld1.u8 {d13}, [r8,:64], r1 @ q5
vld1.u8 {d14}, [r8,:64], r1 @ q6
vld1.u8 {d15}, [r8,:64], r1 @ q7
bl vpx_wide_mbfilter_neon
tst r7, #1
beq h_mbfilter
@ flat && mask were not set for any of the channels. Just store the values
@ from filter.
sub r8, r0, r1, lsl #1
vst1.u8 {d25}, [r8,:64], r1 @ store op1
vst1.u8 {d24}, [r8,:64], r1 @ store op0
vst1.u8 {d23}, [r8,:64], r1 @ store oq0
vst1.u8 {d26}, [r8,:64], r1 @ store oq1
b h_next
h_mbfilter:
tst r7, #2
beq h_wide_mbfilter
@ flat2 was not set for any of the channels. Just store the values from
@ mbfilter.
sub r8, r0, r1, lsl #1
sub r8, r8, r1
vst1.u8 {d18}, [r8,:64], r1 @ store op2
vst1.u8 {d19}, [r8,:64], r1 @ store op1
vst1.u8 {d20}, [r8,:64], r1 @ store op0
vst1.u8 {d21}, [r8,:64], r1 @ store oq0
vst1.u8 {d22}, [r8,:64], r1 @ store oq1
vst1.u8 {d23}, [r8,:64], r1 @ store oq2
b h_next
h_wide_mbfilter:
sub r8, r0, r1, lsl #3
add r8, r8, r1
vst1.u8 {d16}, [r8,:64], r1 @ store op6
vst1.u8 {d24}, [r8,:64], r1 @ store op5
vst1.u8 {d25}, [r8,:64], r1 @ store op4
vst1.u8 {d26}, [r8,:64], r1 @ store op3
vst1.u8 {d27}, [r8,:64], r1 @ store op2
vst1.u8 {d18}, [r8,:64], r1 @ store op1
vst1.u8 {d19}, [r8,:64], r1 @ store op0
vst1.u8 {d20}, [r8,:64], r1 @ store oq0
vst1.u8 {d21}, [r8,:64], r1 @ store oq1
vst1.u8 {d22}, [r8,:64], r1 @ store oq2
vst1.u8 {d23}, [r8,:64], r1 @ store oq3
vst1.u8 {d1}, [r8,:64], r1 @ store oq4
vst1.u8 {d2}, [r8,:64], r1 @ store oq5
vst1.u8 {d3}, [r8,:64], r1 @ store oq6
h_next:
add r0, r0, #8
subs r12, r12, #1
bne h_count
vpop {d8-d15}
pop {r4-r8, pc}
.size mb_lpf_horizontal_edge, .-mb_lpf_horizontal_edge @ ENDP @ |mb_lpf_horizontal_edge|
@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh)
@ r0 uint8_t *s,
@ r1 int pitch,
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh
_vpx_lpf_horizontal_edge_8_neon:
vpx_lpf_horizontal_edge_8_neon: @ PROC
mov r12, #1
b mb_lpf_horizontal_edge
.size vpx_lpf_horizontal_edge_8_neon, .-vpx_lpf_horizontal_edge_8_neon @ ENDP @ |vpx_lpf_horizontal_edge_8_neon|
@ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh)
@ r0 uint8_t *s,
@ r1 int pitch,
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh
_vpx_lpf_horizontal_edge_16_neon:
vpx_lpf_horizontal_edge_16_neon: @ PROC
mov r12, #2
b mb_lpf_horizontal_edge
.size vpx_lpf_horizontal_edge_16_neon, .-vpx_lpf_horizontal_edge_16_neon @ ENDP @ |vpx_lpf_horizontal_edge_16_neon|
@ void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh)
@ r0 uint8_t *s,
@ r1 int p, /* pitch */
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh,
_vpx_lpf_vertical_16_neon:
vpx_lpf_vertical_16_neon: @ PROC
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] @ load thresh
vld1.8 {d16[]}, [r2] @ load *blimit
vld1.8 {d17[]}, [r3] @ load *limit
vld1.8 {d18[]}, [r4] @ load *thresh
sub r8, r0, #8
vld1.8 {d0}, [r8,:64], r1
vld1.8 {d8}, [r0,:64], r1
vld1.8 {d1}, [r8,:64], r1
vld1.8 {d9}, [r0,:64], r1
vld1.8 {d2}, [r8,:64], r1
vld1.8 {d10}, [r0,:64], r1
vld1.8 {d3}, [r8,:64], r1
vld1.8 {d11}, [r0,:64], r1
vld1.8 {d4}, [r8,:64], r1
vld1.8 {d12}, [r0,:64], r1
vld1.8 {d5}, [r8,:64], r1
vld1.8 {d13}, [r0,:64], r1
vld1.8 {d6}, [r8,:64], r1
vld1.8 {d14}, [r0,:64], r1
vld1.8 {d7}, [r8,:64], r1
vld1.8 {d15}, [r0,:64], r1
sub r0, r0, r1, lsl #3
vtrn.32 q0, q2
vtrn.32 q1, q3
vtrn.32 q4, q6
vtrn.32 q5, q7
vtrn.16 q0, q1
vtrn.16 q2, q3
vtrn.16 q4, q5
vtrn.16 q6, q7
vtrn.8 d0, d1
vtrn.8 d2, d3
vtrn.8 d4, d5
vtrn.8 d6, d7
vtrn.8 d8, d9
vtrn.8 d10, d11
vtrn.8 d12, d13
vtrn.8 d14, d15
bl vpx_wide_mbfilter_neon
tst r7, #1
beq v_mbfilter
@ flat && mask were not set for any of the channels. Just store the values
@ from filter.
sub r8, r0, #2
vswp d23, d25
vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
b v_end
v_mbfilter:
tst r7, #2
beq v_wide_mbfilter
@ flat2 was not set for any of the channels. Just store the values from
@ mbfilter.
sub r8, r0, #3
vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1
vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1
vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1
vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1
vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1
vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1
vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1
vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1
vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1
vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1
vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1
vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1
vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1
vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1
vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1
vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1
b v_end
v_wide_mbfilter:
sub r8, r0, #8
vtrn.32 d0, d26
vtrn.32 d16, d27
vtrn.32 d24, d18
vtrn.32 d25, d19
vtrn.16 d0, d24
vtrn.16 d16, d25
vtrn.16 d26, d18
vtrn.16 d27, d19
vtrn.8 d0, d16
vtrn.8 d24, d25
vtrn.8 d26, d27
vtrn.8 d18, d19
vtrn.32 d20, d1
vtrn.32 d21, d2
vtrn.32 d22, d3
vtrn.32 d23, d15
vtrn.16 d20, d22
vtrn.16 d21, d23
vtrn.16 d1, d3
vtrn.16 d2, d15
vtrn.8 d20, d21
vtrn.8 d22, d23
vtrn.8 d1, d2
vtrn.8 d3, d15
vst1.8 {d0}, [r8,:64], r1
vst1.8 {d20}, [r0,:64], r1
vst1.8 {d16}, [r8,:64], r1
vst1.8 {d21}, [r0,:64], r1
vst1.8 {d24}, [r8,:64], r1
vst1.8 {d22}, [r0,:64], r1
vst1.8 {d25}, [r8,:64], r1
vst1.8 {d23}, [r0,:64], r1
vst1.8 {d26}, [r8,:64], r1
vst1.8 {d1}, [r0,:64], r1
vst1.8 {d27}, [r8,:64], r1
vst1.8 {d2}, [r0,:64], r1
vst1.8 {d18}, [r8,:64], r1
vst1.8 {d3}, [r0,:64], r1
vst1.8 {d19}, [r8,:64], r1
vst1.8 {d15}, [r0,:64], r1
v_end:
vpop {d8-d15}
pop {r4-r8, pc}
.size vpx_lpf_vertical_16_neon, .-vpx_lpf_vertical_16_neon @ ENDP @ |vpx_lpf_vertical_16_neon|
@ void vpx_wide_mbfilter_neon();
@ This is a helper function for the loopfilters. The invidual functions do the
@ necessary load, transpose (if necessary) and store.
@
@ r0-r3 PRESERVE
@ d16 blimit
@ d17 limit
@ d18 thresh
@ d0 p7
@ d1 p6
@ d2 p5
@ d3 p4
@ d4 p3
@ d5 p2
@ d6 p1
@ d7 p0
@ d8 q0
@ d9 q1
@ d10 q2
@ d11 q3
@ d12 q4
@ d13 q5
@ d14 q6
@ d15 q7
_vpx_wide_mbfilter_neon:
vpx_wide_mbfilter_neon: @ PROC
mov r7, #0
@ filter_mask
vabd.u8 d19, d4, d5 @ abs(p3 - p2)
vabd.u8 d20, d5, d6 @ abs(p2 - p1)
vabd.u8 d21, d6, d7 @ abs(p1 - p0)
vabd.u8 d22, d9, d8 @ abs(q1 - q0)
vabd.u8 d23, d10, d9 @ abs(q2 - q1)
vabd.u8 d24, d11, d10 @ abs(q3 - q2)
@ only compare the largest value to limit
vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1))
vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0))
vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2))
vmax.u8 d19, d19, d20
vabd.u8 d24, d7, d8 @ abs(p0 - q0)
vmax.u8 d19, d19, d23
vabd.u8 d23, d6, d9 @ a = abs(p1 - q1)
vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2
@ abs () > limit
vcge.u8 d19, d17, d19
@ flatmask4
vabd.u8 d25, d7, d5 @ abs(p0 - p2)
vabd.u8 d26, d8, d10 @ abs(q0 - q2)
vabd.u8 d27, d4, d7 @ abs(p3 - p0)
vabd.u8 d28, d11, d8 @ abs(q3 - q0)
@ only compare the largest value to thresh
vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2))
vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0))
vmax.u8 d25, d25, d26
vmax.u8 d20, d20, d25
vshr.u8 d23, d23, #1 @ a = a / 2
vqadd.u8 d24, d24, d23 @ a = b + a
vmov.u8 d30, #1
vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1
vcge.u8 d20, d30, d20 @ flat
vand d19, d19, d24 @ mask
@ hevmask
vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1
vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1
vorr d21, d21, d22 @ hev
vand d16, d20, d19 @ flat && mask
vmov r5, r6, d16
@ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
vabd.u8 d22, d3, d7 @ abs(p4 - p0)
vabd.u8 d23, d12, d8 @ abs(q4 - q0)
vabd.u8 d24, d7, d2 @ abs(p0 - p5)
vabd.u8 d25, d8, d13 @ abs(q0 - q5)
vabd.u8 d26, d1, d7 @ abs(p6 - p0)
vabd.u8 d27, d14, d8 @ abs(q6 - q0)
vabd.u8 d28, d0, d7 @ abs(p7 - p0)
vabd.u8 d29, d15, d8 @ abs(q7 - q0)
@ only compare the largest value to thresh
vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0))
vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5))
vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0))
vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0))
vmax.u8 d26, d22, d23
vmax.u8 d27, d24, d25
vmax.u8 d23, d26, d27
vcge.u8 d18, d30, d23 @ flat2
vmov.u8 d22, #0x80
orrs r5, r5, r6 @ Check for 0
orreq r7, r7, #1 @ Only do filter branch
vand d17, d18, d16 @ flat2 && flat && mask
vmov r5, r6, d17
@ mbfilter() function
@ filter() function
@ convert to signed
veor d23, d8, d22 @ qs0
veor d24, d7, d22 @ ps0
veor d25, d6, d22 @ ps1
veor d26, d9, d22 @ qs1
vmov.u8 d27, #3
vsub.s8 d28, d23, d24 @ ( qs0 - ps0)
vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1)
vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0)
vand d29, d29, d21 @ filter &= hev
vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0)
vmov.u8 d29, #4
@ filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d28, q15
vand d28, d28, d19 @ filter &= mask
vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3)
vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4)
vshr.s8 d30, d30, #3 @ filter2 >>= 3
vshr.s8 d29, d29, #3 @ filter1 >>= 3
vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2)
vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1)
@ outer tap adjustments: ++filter1 >> 1
vrshr.s8 d29, d29, #1
vbic d29, d29, d21 @ filter &= ~hev
vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter)
vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter)
veor d24, d24, d22 @ *f_op0 = u^0x80
veor d23, d23, d22 @ *f_oq0 = u^0x80
veor d25, d25, d22 @ *f_op1 = u^0x80
veor d26, d26, d22 @ *f_oq1 = u^0x80
tst r7, #1
bxne lr
orrs r5, r5, r6 @ Check for 0
orreq r7, r7, #2 @ Only do mbfilter branch
@ mbfilter flat && mask branch
@ TODO(fgalligan): Can I decrease the cycles shifting to consective d's
@ and using vibt on the q's?
vmov.u8 d29, #2
vaddl.u8 q15, d7, d8 @ op2 = p0 + q0
vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3
vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2
vaddl.u8 q10, d4, d5
vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2
vaddl.u8 q14, d6, d9
vqrshrn.u16 d18, q15, #3 @ r_op2
vsub.i16 q15, q10
vaddl.u8 q10, d4, d6
vadd.i16 q15, q14
vaddl.u8 q14, d7, d10
vqrshrn.u16 d19, q15, #3 @ r_op1
vsub.i16 q15, q10
vadd.i16 q15, q14
vaddl.u8 q14, d8, d11
vqrshrn.u16 d20, q15, #3 @ r_op0
vsubw.u8 q15, d4 @ oq0 = op0 - p3
vsubw.u8 q15, d7 @ oq0 -= p0
vadd.i16 q15, q14
vaddl.u8 q14, d9, d11
vqrshrn.u16 d21, q15, #3 @ r_oq0
vsubw.u8 q15, d5 @ oq1 = oq0 - p2
vsubw.u8 q15, d8 @ oq1 -= q0
vadd.i16 q15, q14
vaddl.u8 q14, d10, d11
vqrshrn.u16 d22, q15, #3 @ r_oq1
vsubw.u8 q15, d6 @ oq2 = oq0 - p1
vsubw.u8 q15, d9 @ oq2 -= q1
vadd.i16 q15, q14
vqrshrn.u16 d27, q15, #3 @ r_oq2
@ Filter does not set op2 or oq2, so use p2 and q2.
vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask)
vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask)
vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask)
vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask)
vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask)
vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask)
vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask)
tst r7, #2
bxne lr
@ wide_mbfilter flat2 && flat && mask branch
vmov.u8 d16, #7
vaddl.u8 q15, d7, d8 @ op6 = p0 + q0
vaddl.u8 q12, d2, d3
vaddl.u8 q13, d4, d5
vaddl.u8 q14, d1, d6
vmlal.u8 q15, d0, d16 @ op6 += p7 * 3
vadd.i16 q12, q13
vadd.i16 q15, q14
vaddl.u8 q14, d2, d9
vadd.i16 q15, q12
vaddl.u8 q12, d0, d1
vaddw.u8 q15, d1
vaddl.u8 q13, d0, d2
vadd.i16 q14, q15, q14
vqrshrn.u16 d16, q15, #4 @ w_op6
vsub.i16 q15, q14, q12
vaddl.u8 q14, d3, d10
vqrshrn.u16 d24, q15, #4 @ w_op5
vsub.i16 q15, q13
vaddl.u8 q13, d0, d3
vadd.i16 q15, q14
vaddl.u8 q14, d4, d11
vqrshrn.u16 d25, q15, #4 @ w_op4
vadd.i16 q15, q14
vaddl.u8 q14, d0, d4
vsub.i16 q15, q13
vsub.i16 q14, q15, q14
vqrshrn.u16 d26, q15, #4 @ w_op3
vaddw.u8 q15, q14, d5 @ op2 += p2
vaddl.u8 q14, d0, d5
vaddw.u8 q15, d12 @ op2 += q4
vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m)
vqrshrn.u16 d27, q15, #4 @ w_op2
vsub.i16 q15, q14
vaddl.u8 q14, d0, d6
vaddw.u8 q15, d6 @ op1 += p1
vaddw.u8 q15, d13 @ op1 += q5
vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m)
vqrshrn.u16 d18, q15, #4 @ w_op1
vsub.i16 q15, q14
vaddl.u8 q14, d0, d7
vaddw.u8 q15, d7 @ op0 += p0
vaddw.u8 q15, d14 @ op0 += q6
vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m)
vqrshrn.u16 d19, q15, #4 @ w_op0
vsub.i16 q15, q14
vaddl.u8 q14, d1, d8
vaddw.u8 q15, d8 @ oq0 += q0
vaddw.u8 q15, d15 @ oq0 += q7
vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m)
vqrshrn.u16 d20, q15, #4 @ w_oq0
vsub.i16 q15, q14
vaddl.u8 q14, d2, d9
vaddw.u8 q15, d9 @ oq1 += q1
vaddl.u8 q4, d10, d15
vaddw.u8 q15, d15 @ oq1 += q7
vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m)
vqrshrn.u16 d21, q15, #4 @ w_oq1
vsub.i16 q15, q14
vaddl.u8 q14, d3, d10
vadd.i16 q15, q4
vaddl.u8 q4, d11, d15
vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m)
vqrshrn.u16 d22, q15, #4 @ w_oq2
vsub.i16 q15, q14
vaddl.u8 q14, d4, d11
vadd.i16 q15, q4
vaddl.u8 q4, d12, d15
vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m)
vqrshrn.u16 d23, q15, #4 @ w_oq3
vsub.i16 q15, q14
vaddl.u8 q14, d5, d12
vadd.i16 q15, q4
vaddl.u8 q4, d13, d15
vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m)
vqrshrn.u16 d1, q15, #4 @ w_oq4
vsub.i16 q15, q14
vaddl.u8 q14, d6, d13
vadd.i16 q15, q4
vaddl.u8 q4, d14, d15
vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m)
vqrshrn.u16 d2, q15, #4 @ w_oq5
vsub.i16 q15, q14
vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m)
vadd.i16 q15, q4
vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m)
vqrshrn.u16 d3, q15, #4 @ w_oq6
vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m)
vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m)
vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m)
bx lr
.size vpx_wide_mbfilter_neon, .-vpx_wide_mbfilter_neon @ ENDP @ |vpx_wide_mbfilter_neon|
.section .note.GNU-stack,"",%progbits

View file

@ -0,0 +1,44 @@
@ This file was created from a .asm file
@ using the ads2gas.pl script.
.equ DO1STROUNDING, 0
@
@ Copyright (c) 2010 The WebM project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
.global vpx_push_neon
.type vpx_push_neon, function
.global vpx_pop_neon
.type vpx_pop_neon, function
.arm
.eabi_attribute 24, 1 @Tag_ABI_align_needed
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
.text
.p2align 2
_vpx_push_neon:
vpx_push_neon: @ PROC
vst1.i64 {d8, d9, d10, d11}, [r0]!
vst1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
.size vpx_push_neon, .-vpx_push_neon @ ENDP
_vpx_pop_neon:
vpx_pop_neon: @ PROC
vld1.i64 {d8, d9, d10, d11}, [r0]!
vld1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
.size vpx_pop_neon, .-vpx_pop_neon @ ENDP
.section .note.GNU-stack,"",%progbits

View file

@ -0,0 +1,660 @@
@ This file was created from a .asm file
@ using the ads2gas_apple.pl script.
.set WIDE_REFERENCE, 0
.set ARCHITECTURE, 5
.set DO1STROUNDING, 0
@
@ Copyright (c) 2014 The WebM project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
.globl _vpx_v_predictor_4x4_neon
.globl vpx_v_predictor_4x4_neon
.globl _vpx_v_predictor_8x8_neon
.globl vpx_v_predictor_8x8_neon
.globl _vpx_v_predictor_16x16_neon
.globl vpx_v_predictor_16x16_neon
.globl _vpx_v_predictor_32x32_neon
.globl vpx_v_predictor_32x32_neon
.globl _vpx_h_predictor_4x4_neon
.globl vpx_h_predictor_4x4_neon
.globl _vpx_h_predictor_8x8_neon
.globl vpx_h_predictor_8x8_neon
.globl _vpx_h_predictor_16x16_neon
.globl vpx_h_predictor_16x16_neon
.globl _vpx_h_predictor_32x32_neon
.globl vpx_h_predictor_32x32_neon
.globl _vpx_tm_predictor_4x4_neon
.globl vpx_tm_predictor_4x4_neon
.globl _vpx_tm_predictor_8x8_neon
.globl vpx_tm_predictor_8x8_neon
.globl _vpx_tm_predictor_16x16_neon
.globl vpx_tm_predictor_16x16_neon
.globl _vpx_tm_predictor_32x32_neon
.globl vpx_tm_predictor_32x32_neon
@ ARM
@
@ PRESERVE8
.text
.p2align 2
@void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_4x4_neon:
vpx_v_predictor_4x4_neon: @
vld1.32 {d0[0]}, [r2]
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d0[0]}, [r0], r1
bx lr
@ @ |vpx_v_predictor_4x4_neon|
@void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_8x8_neon:
vpx_v_predictor_8x8_neon: @
vld1.8 {d0}, [r2]
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
vst1.8 {d0}, [r0], r1
bx lr
@ @ |vpx_v_predictor_8x8_neon|
@void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_16x16_neon:
vpx_v_predictor_16x16_neon: @
vld1.8 {q0}, [r2]
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
vst1.8 {q0}, [r0], r1
bx lr
@ @ |vpx_v_predictor_16x16_neon|
@void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_v_predictor_32x32_neon:
vpx_v_predictor_32x32_neon: @
vld1.8 {q0, q1}, [r2]
mov r2, #2
loop_v:
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
vst1.8 {q0, q1}, [r0], r1
subs r2, r2, #1
bgt loop_v
bx lr
@ @ |vpx_v_predictor_32x32_neon|
@void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_4x4_neon:
vpx_h_predictor_4x4_neon: @
vld1.32 {d1[0]}, [r3]
vdup.8 d0, d1[0]
vst1.32 {d0[0]}, [r0], r1
vdup.8 d0, d1[1]
vst1.32 {d0[0]}, [r0], r1
vdup.8 d0, d1[2]
vst1.32 {d0[0]}, [r0], r1
vdup.8 d0, d1[3]
vst1.32 {d0[0]}, [r0], r1
bx lr
@ @ |vpx_h_predictor_4x4_neon|
@void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_8x8_neon:
vpx_h_predictor_8x8_neon: @
vld1.64 {d1}, [r3]
vdup.8 d0, d1[0]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[1]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[2]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[3]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[4]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[5]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[6]
vst1.64 {d0}, [r0], r1
vdup.8 d0, d1[7]
vst1.64 {d0}, [r0], r1
bx lr
@ @ |vpx_h_predictor_8x8_neon|
@void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_16x16_neon:
vpx_h_predictor_16x16_neon: @
vld1.8 {q1}, [r3]
vdup.8 q0, d2[0]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[1]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[2]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[3]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[4]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[5]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[6]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[7]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[0]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[1]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[2]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[3]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[4]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[5]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[6]
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[7]
vst1.8 {q0}, [r0], r1
bx lr
@ @ |vpx_h_predictor_16x16_neon|
@void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_h_predictor_32x32_neon:
vpx_h_predictor_32x32_neon: @
sub r1, r1, #16
mov r2, #2
loop_h:
vld1.8 {q1}, [r3]!
vdup.8 q0, d2[0]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[1]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[2]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[3]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[4]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[5]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[6]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d2[7]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[0]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[1]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[2]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[3]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[4]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[5]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[6]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
vdup.8 q0, d3[7]
vst1.8 {q0}, [r0]!
vst1.8 {q0}, [r0], r1
subs r2, r2, #1
bgt loop_h
bx lr
@ @ |vpx_h_predictor_32x32_neon|
@void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_4x4_neon:
vpx_tm_predictor_4x4_neon: @
@ Load ytop_left = above[-1] @
sub r12, r2, #1
vld1.u8 {d0[]}, [r12]
@ Load above 4 pixels
vld1.32 {d2[0]}, [r2]
@ Compute above - ytop_left
vsubl.u8 q3, d2, d0
@ Load left row by row and compute left + (above - ytop_left)
@ 1st row and 2nd row
vld1.u8 {d2[]}, [r3]!
vld1.u8 {d4[]}, [r3]!
vmovl.u8 q1, d2
vmovl.u8 q2, d4
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
vqmovun.s16 d0, q1
vqmovun.s16 d1, q2
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
@ 3rd row and 4th row
vld1.u8 {d2[]}, [r3]!
vld1.u8 {d4[]}, [r3]
vmovl.u8 q1, d2
vmovl.u8 q2, d4
vadd.s16 q1, q1, q3
vadd.s16 q2, q2, q3
vqmovun.s16 d0, q1
vqmovun.s16 d1, q2
vst1.32 {d0[0]}, [r0], r1
vst1.32 {d1[0]}, [r0], r1
bx lr
@ @ |vpx_tm_predictor_4x4_neon|
@void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_8x8_neon:
vpx_tm_predictor_8x8_neon: @
@ Load ytop_left = above[-1] @
sub r12, r2, #1
vld1.8 {d0[]}, [r12]
@ preload 8 left
vld1.8 {d30}, [r3]
@ Load above 8 pixels
vld1.64 {d2}, [r2]
vmovl.u8 q10, d30
@ Compute above - ytop_left
vsubl.u8 q3, d2, d0
@ Load left row by row and compute left + (above - ytop_left)
@ 1st row and 2nd row
vdup.16 q0, d20[0]
vdup.16 q1, d20[1]
vadd.s16 q0, q3, q0
vadd.s16 q1, q3, q1
@ 3rd row and 4th row
vdup.16 q8, d20[2]
vdup.16 q9, d20[3]
vadd.s16 q8, q3, q8
vadd.s16 q9, q3, q9
vqmovun.s16 d0, q0
vqmovun.s16 d1, q1
vqmovun.s16 d2, q8
vqmovun.s16 d3, q9
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
vst1.64 {d2}, [r0], r1
vst1.64 {d3}, [r0], r1
@ 5th row and 6th row
vdup.16 q0, d21[0]
vdup.16 q1, d21[1]
vadd.s16 q0, q3, q0
vadd.s16 q1, q3, q1
@ 7th row and 8th row
vdup.16 q8, d21[2]
vdup.16 q9, d21[3]
vadd.s16 q8, q3, q8
vadd.s16 q9, q3, q9
vqmovun.s16 d0, q0
vqmovun.s16 d1, q1
vqmovun.s16 d2, q8
vqmovun.s16 d3, q9
vst1.64 {d0}, [r0], r1
vst1.64 {d1}, [r0], r1
vst1.64 {d2}, [r0], r1
vst1.64 {d3}, [r0], r1
bx lr
@ @ |vpx_tm_predictor_8x8_neon|
@void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_16x16_neon:
vpx_tm_predictor_16x16_neon: @
@ Load ytop_left = above[-1] @
sub r12, r2, #1
vld1.8 {d0[]}, [r12]
@ Load above 8 pixels
vld1.8 {q1}, [r2]
@ preload 8 left into r12
vld1.8 {d18}, [r3]!
@ Compute above - ytop_left
vsubl.u8 q2, d2, d0
vsubl.u8 q3, d3, d0
vmovl.u8 q10, d18
@ Load left row by row and compute left + (above - ytop_left)
@ Process 8 rows in each single loop and loop 2 times to process 16 rows.
mov r2, #2
loop_16x16_neon:
@ Process two rows.
vdup.16 q0, d20[0]
vdup.16 q8, d20[1]
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vdup.16 q0, d20[2] @ proload next 2 rows data
vdup.16 q8, d20[3]
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
@ Process two rows.
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vdup.16 q0, d21[0] @ proload next 2 rows data
vdup.16 q8, d21[1]
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vdup.16 q0, d21[2] @ proload next 2 rows data
vdup.16 q8, d21[3]
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
vadd.s16 q1, q0, q2
vadd.s16 q0, q0, q3
vadd.s16 q11, q8, q2
vadd.s16 q8, q8, q3
vqmovun.s16 d2, q1
vqmovun.s16 d3, q0
vqmovun.s16 d22, q11
vqmovun.s16 d23, q8
vld1.8 {d18}, [r3]! @ preload 8 left into r12
vmovl.u8 q10, d18
vst1.64 {d2,d3}, [r0], r1
vst1.64 {d22,d23}, [r0], r1
subs r2, r2, #1
bgt loop_16x16_neon
bx lr
@ @ |vpx_tm_predictor_16x16_neon|
@void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
@ const uint8_t *above,
@ const uint8_t *left)
@ r0 uint8_t *dst
@ r1 ptrdiff_t y_stride
@ r2 const uint8_t *above
@ r3 const uint8_t *left
_vpx_tm_predictor_32x32_neon:
vpx_tm_predictor_32x32_neon: @
@ Load ytop_left = above[-1] @
sub r12, r2, #1
vld1.8 {d0[]}, [r12]
@ Load above 32 pixels
vld1.8 {q1}, [r2]!
vld1.8 {q2}, [r2]
@ preload 8 left pixels
vld1.8 {d26}, [r3]!
@ Compute above - ytop_left
vsubl.u8 q8, d2, d0
vsubl.u8 q9, d3, d0
vsubl.u8 q10, d4, d0
vsubl.u8 q11, d5, d0
vmovl.u8 q3, d26
@ Load left row by row and compute left + (above - ytop_left)
@ Process 8 rows in each single loop and loop 4 times to process 32 rows.
mov r2, #4
loop_32x32_neon:
@ Process two rows.
vdup.16 q0, d6[0]
vdup.16 q2, d6[1]
vadd.s16 q12, q0, q8
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vdup.16 q1, d6[2]
vdup.16 q2, d6[3]
vst1.64 {d24-d27}, [r0], r1
@ Process two rows.
vadd.s16 q12, q1, q8
vadd.s16 q13, q1, q9
vadd.s16 q14, q1, q10
vadd.s16 q15, q1, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vdup.16 q0, d7[0]
vdup.16 q2, d7[1]
vst1.64 {d24-d27}, [r0], r1
@ Process two rows.
vadd.s16 q12, q0, q8
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vdup.16 q0, d7[2]
vdup.16 q2, d7[3]
vst1.64 {d24-d27}, [r0], r1
@ Process two rows.
vadd.s16 q12, q0, q8
vadd.s16 q13, q0, q9
vadd.s16 q14, q0, q10
vadd.s16 q15, q0, q11
vqmovun.s16 d0, q12
vqmovun.s16 d1, q13
vadd.s16 q12, q2, q8
vadd.s16 q13, q2, q9
vqmovun.s16 d2, q14
vqmovun.s16 d3, q15
vadd.s16 q14, q2, q10
vadd.s16 q15, q2, q11
vst1.64 {d0-d3}, [r0], r1
vqmovun.s16 d24, q12
vqmovun.s16 d25, q13
vld1.8 {d0}, [r3]! @ preload 8 left pixels
vqmovun.s16 d26, q14
vqmovun.s16 d27, q15
vmovl.u8 q3, d0
vst1.64 {d24-d27}, [r0], r1
subs r2, r2, #1
bgt loop_32x32_neon
bx lr
@ @ |vpx_tm_predictor_32x32_neon|

View file

@ -0,0 +1,649 @@
@ This file was created from a .asm file
@ using the ads2gas_apple.pl script.
.set WIDE_REFERENCE, 0
.set ARCHITECTURE, 5
.set DO1STROUNDING, 0
@
@ Copyright (c) 2013 The WebM project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
.globl _vpx_lpf_horizontal_edge_8_neon
.globl vpx_lpf_horizontal_edge_8_neon
.globl _vpx_lpf_horizontal_edge_16_neon
.globl vpx_lpf_horizontal_edge_16_neon
.globl _vpx_lpf_vertical_16_neon
.globl vpx_lpf_vertical_16_neon
@ ARM
.text
.p2align 2
@ void mb_lpf_horizontal_edge(uint8_t *s, int p,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh,
@ int count)
@ r0 uint8_t *s,
@ r1 int p, /* pitch */
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh,
@ r12 int count
_mb_lpf_horizontal_edge:
mb_lpf_horizontal_edge: @
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] @ load thresh
h_count:
vld1.8 {d16[]}, [r2] @ load *blimit
vld1.8 {d17[]}, [r3] @ load *limit
vld1.8 {d18[]}, [r4] @ load *thresh
sub r8, r0, r1, lsl #3 @ move src pointer down by 8 lines
vld1.u8 {d0}, [r8,:64], r1 @ p7
vld1.u8 {d1}, [r8,:64], r1 @ p6
vld1.u8 {d2}, [r8,:64], r1 @ p5
vld1.u8 {d3}, [r8,:64], r1 @ p4
vld1.u8 {d4}, [r8,:64], r1 @ p3
vld1.u8 {d5}, [r8,:64], r1 @ p2
vld1.u8 {d6}, [r8,:64], r1 @ p1
vld1.u8 {d7}, [r8,:64], r1 @ p0
vld1.u8 {d8}, [r8,:64], r1 @ q0
vld1.u8 {d9}, [r8,:64], r1 @ q1
vld1.u8 {d10}, [r8,:64], r1 @ q2
vld1.u8 {d11}, [r8,:64], r1 @ q3
vld1.u8 {d12}, [r8,:64], r1 @ q4
vld1.u8 {d13}, [r8,:64], r1 @ q5
vld1.u8 {d14}, [r8,:64], r1 @ q6
vld1.u8 {d15}, [r8,:64], r1 @ q7
bl vpx_wide_mbfilter_neon
tst r7, #1
beq h_mbfilter
@ flat && mask were not set for any of the channels. Just store the values
@ from filter.
sub r8, r0, r1, lsl #1
vst1.u8 {d25}, [r8,:64], r1 @ store op1
vst1.u8 {d24}, [r8,:64], r1 @ store op0
vst1.u8 {d23}, [r8,:64], r1 @ store oq0
vst1.u8 {d26}, [r8,:64], r1 @ store oq1
b h_next
h_mbfilter:
tst r7, #2
beq h_wide_mbfilter
@ flat2 was not set for any of the channels. Just store the values from
@ mbfilter.
sub r8, r0, r1, lsl #1
sub r8, r8, r1
vst1.u8 {d18}, [r8,:64], r1 @ store op2
vst1.u8 {d19}, [r8,:64], r1 @ store op1
vst1.u8 {d20}, [r8,:64], r1 @ store op0
vst1.u8 {d21}, [r8,:64], r1 @ store oq0
vst1.u8 {d22}, [r8,:64], r1 @ store oq1
vst1.u8 {d23}, [r8,:64], r1 @ store oq2
b h_next
h_wide_mbfilter:
sub r8, r0, r1, lsl #3
add r8, r8, r1
vst1.u8 {d16}, [r8,:64], r1 @ store op6
vst1.u8 {d24}, [r8,:64], r1 @ store op5
vst1.u8 {d25}, [r8,:64], r1 @ store op4
vst1.u8 {d26}, [r8,:64], r1 @ store op3
vst1.u8 {d27}, [r8,:64], r1 @ store op2
vst1.u8 {d18}, [r8,:64], r1 @ store op1
vst1.u8 {d19}, [r8,:64], r1 @ store op0
vst1.u8 {d20}, [r8,:64], r1 @ store oq0
vst1.u8 {d21}, [r8,:64], r1 @ store oq1
vst1.u8 {d22}, [r8,:64], r1 @ store oq2
vst1.u8 {d23}, [r8,:64], r1 @ store oq3
vst1.u8 {d1}, [r8,:64], r1 @ store oq4
vst1.u8 {d2}, [r8,:64], r1 @ store oq5
vst1.u8 {d3}, [r8,:64], r1 @ store oq6
h_next:
add r0, r0, #8
subs r12, r12, #1
bne h_count
vpop {d8-d15}
pop {r4-r8, pc}
@ @ |mb_lpf_horizontal_edge|
@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh)
@ r0 uint8_t *s,
@ r1 int pitch,
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh
_vpx_lpf_horizontal_edge_8_neon:
vpx_lpf_horizontal_edge_8_neon: @
mov r12, #1
b mb_lpf_horizontal_edge
@ @ |vpx_lpf_horizontal_edge_8_neon|
@ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh)
@ r0 uint8_t *s,
@ r1 int pitch,
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh
_vpx_lpf_horizontal_edge_16_neon:
vpx_lpf_horizontal_edge_16_neon: @
mov r12, #2
b mb_lpf_horizontal_edge
@ @ |vpx_lpf_horizontal_edge_16_neon|
@ void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
@ const uint8_t *blimit,
@ const uint8_t *limit,
@ const uint8_t *thresh)
@ r0 uint8_t *s,
@ r1 int p, /* pitch */
@ r2 const uint8_t *blimit,
@ r3 const uint8_t *limit,
@ sp const uint8_t *thresh,
_vpx_lpf_vertical_16_neon:
vpx_lpf_vertical_16_neon: @
push {r4-r8, lr}
vpush {d8-d15}
ldr r4, [sp, #88] @ load thresh
vld1.8 {d16[]}, [r2] @ load *blimit
vld1.8 {d17[]}, [r3] @ load *limit
vld1.8 {d18[]}, [r4] @ load *thresh
sub r8, r0, #8
vld1.8 {d0}, [r8,:64], r1
vld1.8 {d8}, [r0,:64], r1
vld1.8 {d1}, [r8,:64], r1
vld1.8 {d9}, [r0,:64], r1
vld1.8 {d2}, [r8,:64], r1
vld1.8 {d10}, [r0,:64], r1
vld1.8 {d3}, [r8,:64], r1
vld1.8 {d11}, [r0,:64], r1
vld1.8 {d4}, [r8,:64], r1
vld1.8 {d12}, [r0,:64], r1
vld1.8 {d5}, [r8,:64], r1
vld1.8 {d13}, [r0,:64], r1
vld1.8 {d6}, [r8,:64], r1
vld1.8 {d14}, [r0,:64], r1
vld1.8 {d7}, [r8,:64], r1
vld1.8 {d15}, [r0,:64], r1
sub r0, r0, r1, lsl #3
vtrn.32 q0, q2
vtrn.32 q1, q3
vtrn.32 q4, q6
vtrn.32 q5, q7
vtrn.16 q0, q1
vtrn.16 q2, q3
vtrn.16 q4, q5
vtrn.16 q6, q7
vtrn.8 d0, d1
vtrn.8 d2, d3
vtrn.8 d4, d5
vtrn.8 d6, d7
vtrn.8 d8, d9
vtrn.8 d10, d11
vtrn.8 d12, d13
vtrn.8 d14, d15
bl vpx_wide_mbfilter_neon
tst r7, #1
beq v_mbfilter
@ flat && mask were not set for any of the channels. Just store the values
@ from filter.
sub r8, r0, #2
vswp d23, d25
vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
b v_end
v_mbfilter:
tst r7, #2
beq v_wide_mbfilter
@ flat2 was not set for any of the channels. Just store the values from
@ mbfilter.
sub r8, r0, #3
vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1
vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1
vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1
vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1
vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1
vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1
vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1
vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1
vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1
vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1
vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1
vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1
vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1
vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1
vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1
vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1
b v_end
v_wide_mbfilter:
sub r8, r0, #8
vtrn.32 d0, d26
vtrn.32 d16, d27
vtrn.32 d24, d18
vtrn.32 d25, d19
vtrn.16 d0, d24
vtrn.16 d16, d25
vtrn.16 d26, d18
vtrn.16 d27, d19
vtrn.8 d0, d16
vtrn.8 d24, d25
vtrn.8 d26, d27
vtrn.8 d18, d19
vtrn.32 d20, d1
vtrn.32 d21, d2
vtrn.32 d22, d3
vtrn.32 d23, d15
vtrn.16 d20, d22
vtrn.16 d21, d23
vtrn.16 d1, d3
vtrn.16 d2, d15
vtrn.8 d20, d21
vtrn.8 d22, d23
vtrn.8 d1, d2
vtrn.8 d3, d15
vst1.8 {d0}, [r8,:64], r1
vst1.8 {d20}, [r0,:64], r1
vst1.8 {d16}, [r8,:64], r1
vst1.8 {d21}, [r0,:64], r1
vst1.8 {d24}, [r8,:64], r1
vst1.8 {d22}, [r0,:64], r1
vst1.8 {d25}, [r8,:64], r1
vst1.8 {d23}, [r0,:64], r1
vst1.8 {d26}, [r8,:64], r1
vst1.8 {d1}, [r0,:64], r1
vst1.8 {d27}, [r8,:64], r1
vst1.8 {d2}, [r0,:64], r1
vst1.8 {d18}, [r8,:64], r1
vst1.8 {d3}, [r0,:64], r1
vst1.8 {d19}, [r8,:64], r1
vst1.8 {d15}, [r0,:64], r1
v_end:
vpop {d8-d15}
pop {r4-r8, pc}
@ @ |vpx_lpf_vertical_16_neon|
@ void vpx_wide_mbfilter_neon() @
@ This is a helper function for the loopfilters. The invidual functions do the
@ necessary load, transpose (if necessary) and store.
@
@ r0-r3 PRESERVE
@ d16 blimit
@ d17 limit
@ d18 thresh
@ d0 p7
@ d1 p6
@ d2 p5
@ d3 p4
@ d4 p3
@ d5 p2
@ d6 p1
@ d7 p0
@ d8 q0
@ d9 q1
@ d10 q2
@ d11 q3
@ d12 q4
@ d13 q5
@ d14 q6
@ d15 q7
_vpx_wide_mbfilter_neon:
vpx_wide_mbfilter_neon: @
mov r7, #0
@ filter_mask
vabd.u8 d19, d4, d5 @ abs(p3 - p2)
vabd.u8 d20, d5, d6 @ abs(p2 - p1)
vabd.u8 d21, d6, d7 @ abs(p1 - p0)
vabd.u8 d22, d9, d8 @ abs(q1 - q0)
vabd.u8 d23, d10, d9 @ abs(q2 - q1)
vabd.u8 d24, d11, d10 @ abs(q3 - q2)
@ only compare the largest value to limit
vmax.u8 d19, d19, d20 @ max(abs(p3 - p2), abs(p2 - p1))
vmax.u8 d20, d21, d22 @ max(abs(p1 - p0), abs(q1 - q0))
vmax.u8 d23, d23, d24 @ max(abs(q2 - q1), abs(q3 - q2))
vmax.u8 d19, d19, d20
vabd.u8 d24, d7, d8 @ abs(p0 - q0)
vmax.u8 d19, d19, d23
vabd.u8 d23, d6, d9 @ a = abs(p1 - q1)
vqadd.u8 d24, d24, d24 @ b = abs(p0 - q0) * 2
@ abs () > limit
vcge.u8 d19, d17, d19
@ flatmask4
vabd.u8 d25, d7, d5 @ abs(p0 - p2)
vabd.u8 d26, d8, d10 @ abs(q0 - q2)
vabd.u8 d27, d4, d7 @ abs(p3 - p0)
vabd.u8 d28, d11, d8 @ abs(q3 - q0)
@ only compare the largest value to thresh
vmax.u8 d25, d25, d26 @ max(abs(p0 - p2), abs(q0 - q2))
vmax.u8 d26, d27, d28 @ max(abs(p3 - p0), abs(q3 - q0))
vmax.u8 d25, d25, d26
vmax.u8 d20, d20, d25
vshr.u8 d23, d23, #1 @ a = a / 2
vqadd.u8 d24, d24, d23 @ a = b + a
vmov.u8 d30, #1
vcge.u8 d24, d16, d24 @ (a > blimit * 2 + limit) * -1
vcge.u8 d20, d30, d20 @ flat
vand d19, d19, d24 @ mask
@ hevmask
vcgt.u8 d21, d21, d18 @ (abs(p1 - p0) > thresh)*-1
vcgt.u8 d22, d22, d18 @ (abs(q1 - q0) > thresh)*-1
vorr d21, d21, d22 @ hev
vand d16, d20, d19 @ flat && mask
vmov r5, r6, d16
@ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
vabd.u8 d22, d3, d7 @ abs(p4 - p0)
vabd.u8 d23, d12, d8 @ abs(q4 - q0)
vabd.u8 d24, d7, d2 @ abs(p0 - p5)
vabd.u8 d25, d8, d13 @ abs(q0 - q5)
vabd.u8 d26, d1, d7 @ abs(p6 - p0)
vabd.u8 d27, d14, d8 @ abs(q6 - q0)
vabd.u8 d28, d0, d7 @ abs(p7 - p0)
vabd.u8 d29, d15, d8 @ abs(q7 - q0)
@ only compare the largest value to thresh
vmax.u8 d22, d22, d23 @ max(abs(p4 - p0), abs(q4 - q0))
vmax.u8 d23, d24, d25 @ max(abs(p0 - p5), abs(q0 - q5))
vmax.u8 d24, d26, d27 @ max(abs(p6 - p0), abs(q6 - q0))
vmax.u8 d25, d28, d29 @ max(abs(p7 - p0), abs(q7 - q0))
vmax.u8 d26, d22, d23
vmax.u8 d27, d24, d25
vmax.u8 d23, d26, d27
vcge.u8 d18, d30, d23 @ flat2
vmov.u8 d22, #0x80
orrs r5, r5, r6 @ Check for 0
orreq r7, r7, #1 @ Only do filter branch
vand d17, d18, d16 @ flat2 && flat && mask
vmov r5, r6, d17
@ mbfilter() function
@ filter() function
@ convert to signed
veor d23, d8, d22 @ qs0
veor d24, d7, d22 @ ps0
veor d25, d6, d22 @ ps1
veor d26, d9, d22 @ qs1
vmov.u8 d27, #3
vsub.s8 d28, d23, d24 @ ( qs0 - ps0)
vqsub.s8 d29, d25, d26 @ filter = clamp(ps1-qs1)
vmull.s8 q15, d28, d27 @ 3 * ( qs0 - ps0)
vand d29, d29, d21 @ filter &= hev
vaddw.s8 q15, q15, d29 @ filter + 3 * (qs0 - ps0)
vmov.u8 d29, #4
@ filter = clamp(filter + 3 * ( qs0 - ps0))
vqmovn.s16 d28, q15
vand d28, d28, d19 @ filter &= mask
vqadd.s8 d30, d28, d27 @ filter2 = clamp(filter+3)
vqadd.s8 d29, d28, d29 @ filter1 = clamp(filter+4)
vshr.s8 d30, d30, #3 @ filter2 >>= 3
vshr.s8 d29, d29, #3 @ filter1 >>= 3
vqadd.s8 d24, d24, d30 @ op0 = clamp(ps0 + filter2)
vqsub.s8 d23, d23, d29 @ oq0 = clamp(qs0 - filter1)
@ outer tap adjustments: ++filter1 >> 1
vrshr.s8 d29, d29, #1
vbic d29, d29, d21 @ filter &= ~hev
vqadd.s8 d25, d25, d29 @ op1 = clamp(ps1 + filter)
vqsub.s8 d26, d26, d29 @ oq1 = clamp(qs1 - filter)
veor d24, d24, d22 @ *f_op0 = u^0x80
veor d23, d23, d22 @ *f_oq0 = u^0x80
veor d25, d25, d22 @ *f_op1 = u^0x80
veor d26, d26, d22 @ *f_oq1 = u^0x80
tst r7, #1
bxne lr
orrs r5, r5, r6 @ Check for 0
orreq r7, r7, #2 @ Only do mbfilter branch
@ mbfilter flat && mask branch
@ TODO(fgalligan): Can I decrease the cycles shifting to consective d's
@ and using vibt on the q's?
vmov.u8 d29, #2
vaddl.u8 q15, d7, d8 @ op2 = p0 + q0
vmlal.u8 q15, d4, d27 @ op2 = p0 + q0 + p3 * 3
vmlal.u8 q15, d5, d29 @ op2 = p0 + q0 + p3 * 3 + p2 * 2
vaddl.u8 q10, d4, d5
vaddw.u8 q15, d6 @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2
vaddl.u8 q14, d6, d9
vqrshrn.u16 d18, q15, #3 @ r_op2
vsub.i16 q15, q10
vaddl.u8 q10, d4, d6
vadd.i16 q15, q14
vaddl.u8 q14, d7, d10
vqrshrn.u16 d19, q15, #3 @ r_op1
vsub.i16 q15, q10
vadd.i16 q15, q14
vaddl.u8 q14, d8, d11
vqrshrn.u16 d20, q15, #3 @ r_op0
vsubw.u8 q15, d4 @ oq0 = op0 - p3
vsubw.u8 q15, d7 @ oq0 -= p0
vadd.i16 q15, q14
vaddl.u8 q14, d9, d11
vqrshrn.u16 d21, q15, #3 @ r_oq0
vsubw.u8 q15, d5 @ oq1 = oq0 - p2
vsubw.u8 q15, d8 @ oq1 -= q0
vadd.i16 q15, q14
vaddl.u8 q14, d10, d11
vqrshrn.u16 d22, q15, #3 @ r_oq1
vsubw.u8 q15, d6 @ oq2 = oq0 - p1
vsubw.u8 q15, d9 @ oq2 -= q1
vadd.i16 q15, q14
vqrshrn.u16 d27, q15, #3 @ r_oq2
@ Filter does not set op2 or oq2, so use p2 and q2.
vbif d18, d5, d16 @ t_op2 |= p2 & ~(flat & mask)
vbif d19, d25, d16 @ t_op1 |= f_op1 & ~(flat & mask)
vbif d20, d24, d16 @ t_op0 |= f_op0 & ~(flat & mask)
vbif d21, d23, d16 @ t_oq0 |= f_oq0 & ~(flat & mask)
vbif d22, d26, d16 @ t_oq1 |= f_oq1 & ~(flat & mask)
vbit d23, d27, d16 @ t_oq2 |= r_oq2 & (flat & mask)
vbif d23, d10, d16 @ t_oq2 |= q2 & ~(flat & mask)
tst r7, #2
bxne lr
@ wide_mbfilter flat2 && flat && mask branch
vmov.u8 d16, #7
vaddl.u8 q15, d7, d8 @ op6 = p0 + q0
vaddl.u8 q12, d2, d3
vaddl.u8 q13, d4, d5
vaddl.u8 q14, d1, d6
vmlal.u8 q15, d0, d16 @ op6 += p7 * 3
vadd.i16 q12, q13
vadd.i16 q15, q14
vaddl.u8 q14, d2, d9
vadd.i16 q15, q12
vaddl.u8 q12, d0, d1
vaddw.u8 q15, d1
vaddl.u8 q13, d0, d2
vadd.i16 q14, q15, q14
vqrshrn.u16 d16, q15, #4 @ w_op6
vsub.i16 q15, q14, q12
vaddl.u8 q14, d3, d10
vqrshrn.u16 d24, q15, #4 @ w_op5
vsub.i16 q15, q13
vaddl.u8 q13, d0, d3
vadd.i16 q15, q14
vaddl.u8 q14, d4, d11
vqrshrn.u16 d25, q15, #4 @ w_op4
vadd.i16 q15, q14
vaddl.u8 q14, d0, d4
vsub.i16 q15, q13
vsub.i16 q14, q15, q14
vqrshrn.u16 d26, q15, #4 @ w_op3
vaddw.u8 q15, q14, d5 @ op2 += p2
vaddl.u8 q14, d0, d5
vaddw.u8 q15, d12 @ op2 += q4
vbif d26, d4, d17 @ op3 |= p3 & ~(f2 & f & m)
vqrshrn.u16 d27, q15, #4 @ w_op2
vsub.i16 q15, q14
vaddl.u8 q14, d0, d6
vaddw.u8 q15, d6 @ op1 += p1
vaddw.u8 q15, d13 @ op1 += q5
vbif d27, d18, d17 @ op2 |= t_op2 & ~(f2 & f & m)
vqrshrn.u16 d18, q15, #4 @ w_op1
vsub.i16 q15, q14
vaddl.u8 q14, d0, d7
vaddw.u8 q15, d7 @ op0 += p0
vaddw.u8 q15, d14 @ op0 += q6
vbif d18, d19, d17 @ op1 |= t_op1 & ~(f2 & f & m)
vqrshrn.u16 d19, q15, #4 @ w_op0
vsub.i16 q15, q14
vaddl.u8 q14, d1, d8
vaddw.u8 q15, d8 @ oq0 += q0
vaddw.u8 q15, d15 @ oq0 += q7
vbif d19, d20, d17 @ op0 |= t_op0 & ~(f2 & f & m)
vqrshrn.u16 d20, q15, #4 @ w_oq0
vsub.i16 q15, q14
vaddl.u8 q14, d2, d9
vaddw.u8 q15, d9 @ oq1 += q1
vaddl.u8 q4, d10, d15
vaddw.u8 q15, d15 @ oq1 += q7
vbif d20, d21, d17 @ oq0 |= t_oq0 & ~(f2 & f & m)
vqrshrn.u16 d21, q15, #4 @ w_oq1
vsub.i16 q15, q14
vaddl.u8 q14, d3, d10
vadd.i16 q15, q4
vaddl.u8 q4, d11, d15
vbif d21, d22, d17 @ oq1 |= t_oq1 & ~(f2 & f & m)
vqrshrn.u16 d22, q15, #4 @ w_oq2
vsub.i16 q15, q14
vaddl.u8 q14, d4, d11
vadd.i16 q15, q4
vaddl.u8 q4, d12, d15
vbif d22, d23, d17 @ oq2 |= t_oq2 & ~(f2 & f & m)
vqrshrn.u16 d23, q15, #4 @ w_oq3
vsub.i16 q15, q14
vaddl.u8 q14, d5, d12
vadd.i16 q15, q4
vaddl.u8 q4, d13, d15
vbif d16, d1, d17 @ op6 |= p6 & ~(f2 & f & m)
vqrshrn.u16 d1, q15, #4 @ w_oq4
vsub.i16 q15, q14
vaddl.u8 q14, d6, d13
vadd.i16 q15, q4
vaddl.u8 q4, d14, d15
vbif d24, d2, d17 @ op5 |= p5 & ~(f2 & f & m)
vqrshrn.u16 d2, q15, #4 @ w_oq5
vsub.i16 q15, q14
vbif d25, d3, d17 @ op4 |= p4 & ~(f2 & f & m)
vadd.i16 q15, q4
vbif d23, d11, d17 @ oq3 |= q3 & ~(f2 & f & m)
vqrshrn.u16 d3, q15, #4 @ w_oq6
vbif d1, d12, d17 @ oq4 |= q4 & ~(f2 & f & m)
vbif d2, d13, d17 @ oq5 |= q5 & ~(f2 & f & m)
vbif d3, d14, d17 @ oq6 |= q6 & ~(f2 & f & m)
bx lr
@ @ |vpx_wide_mbfilter_neon|

View file

@ -0,0 +1,46 @@
@ This file was created from a .asm file
@ using the ads2gas_apple.pl script.
.set WIDE_REFERENCE, 0
.set ARCHITECTURE, 5
.set DO1STROUNDING, 0
@
@ Copyright (c) 2010 The WebM project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
.globl _vpx_push_neon
.globl vpx_push_neon
.globl _vpx_pop_neon
.globl vpx_pop_neon
@ ARM
@
@ PRESERVE8
.text
.p2align 2
_vpx_push_neon:
vpx_push_neon: @
vst1.i64 {d8, d9, d10, d11}, [r0]!
vst1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
@
_vpx_pop_neon:
vpx_pop_neon: @
vld1.i64 {d8, d9, d10, d11}, [r0]!
vld1.i64 {d12, d13, d14, d15}, [r0]!
bx lr
@

9
thirdparty/libvpx/vpx_dsp_rtcd.h vendored Normal file
View file

@ -0,0 +1,9 @@
#include "vpx_config.h"
#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
#include "rtcd/vpx_dsp_rtcd_x86.h"
#elif defined(WEBM_ARMASM) && ARCH_ARM
#include "rtcd/vpx_dsp_rtcd_arm.h"
#else
#include "rtcd/vpx_dsp_rtcd_c.h"
#endif

View file

@ -38,26 +38,7 @@ static int arm_cpu_env_mask(void) {
} }
#if !CONFIG_RUNTIME_CPU_DETECT #if !CONFIG_RUNTIME_CPU_DETECT
#error "CONFIG_RUNTIME_CPU_DETECT should be enabled!"
int arm_cpu_caps(void) {
/* This function should actually be a no-op. There is no way to adjust any of
* these because the RTCD tables do not exist: the functions are called
* statically */
int flags;
int mask;
if (!arm_cpu_env_flags(&flags)) {
return flags;
}
mask = arm_cpu_env_mask();
#if HAVE_MEDIA
flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON || HAVE_NEON_ASM
flags |= HAS_NEON;
#endif /* HAVE_NEON || HAVE_NEON_ASM */
return flags & mask;
}
#elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
#define WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN
@ -76,28 +57,28 @@ int arm_cpu_caps(void) {
* All of these instructions should be essentially nops. * All of these instructions should be essentially nops.
*/ */
#if HAVE_MEDIA #if HAVE_MEDIA
if (mask & HAS_MEDIA) if (mask & HAS_MEDIA) {
__try { __try {
/*SHADD8 r3,r3,r3*/ /*SHADD8 r3,r3,r3*/
__emit(0xE6333F93); __emit(0xE6333F93);
flags |= HAS_MEDIA; flags |= HAS_MEDIA;
} __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
/*Ignore exception.*/ /*Ignore exception.*/
}
} }
}
#endif /* HAVE_MEDIA */ #endif /* HAVE_MEDIA */
#if HAVE_NEON || HAVE_NEON_ASM #if HAVE_NEON || HAVE_NEON_ASM
if (mask &HAS_NEON) { if (mask &HAS_NEON) {
__try { __try {
/*VORR q0,q0,q0*/ /*VORR q0,q0,q0*/
__emit(0xF2200150); __emit(0xF2200150);
flags |= HAS_NEON; flags |= HAS_NEON;
} __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
/*Ignore exception.*/ /*Ignore exception.*/
}
} }
}
#endif /* HAVE_NEON || HAVE_NEON_ASM */ #endif /* HAVE_NEON || HAVE_NEON_ASM */
return flags & mask; return flags & mask;
} }
#elif defined(__ANDROID__) /* end _MSC_VER */ #elif defined(__ANDROID__) /* end _MSC_VER */
@ -170,6 +151,20 @@ int arm_cpu_caps(void) {
return flags & mask; return flags & mask;
} }
#else /* end __linux__ */ #else /* end __linux__ */
#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \ int arm_cpu_caps(void) {
"available for your platform. Reconfigure with --disable-runtime-cpu-detect." int flags;
int mask;
if (!arm_cpu_env_flags(&flags)) {
return flags;
}
mask = arm_cpu_env_mask();
#if HAVE_MEDIA
flags |= HAS_MEDIA;
#endif /* HAVE_MEDIA */
#if HAVE_NEON || HAVE_NEON_ASM
flags |= HAS_NEON;
#endif /* HAVE_NEON || HAVE_NEON_ASM */
return flags & mask;
}
#warning "ARM run-time CPU detection is disabled for this platform..."
#endif #endif

View file

@ -13,10 +13,10 @@
#include "./vpx_config.h" #include "./vpx_config.h"
#if ARCH_X86 || ARCH_X86_64 #if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
void vpx_reset_mmx_state(void); void vpx_reset_mmx_state(void);
#define vpx_clear_system_state() vpx_reset_mmx_state() #define vpx_clear_system_state() vpx_reset_mmx_state()
#else #else
#define vpx_clear_system_state() #define vpx_clear_system_state()
#endif // ARCH_X86 || ARCH_X86_64 #endif // ARCH_X86 || ARCH_X86_64
#endif // VPX_PORTS_SYSTEM_STATE_H_ #endif // VPX_PORTS_SYSTEM_STATE_H_

44
thirdparty/libvpx/vpx_scale_rtcd.h vendored Normal file
View file

@ -0,0 +1,44 @@
#ifndef VPX_SCALE_RTCD_H_
#define VPX_SCALE_RTCD_H_
#ifdef RTCD_C
#define RTCD_EXTERN
#else
#define RTCD_EXTERN extern
#endif
struct yv12_buffer_config;
#ifdef __cplusplus
extern "C" {
#endif
void vp8_yv12_copy_frame_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
#define vp8_yv12_copy_frame vp8_yv12_copy_frame_c
void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf);
#define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c
void vpx_extend_frame_borders_c(struct yv12_buffer_config *ybf);
#define vpx_extend_frame_borders vpx_extend_frame_borders_c
void vpx_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf);
#define vpx_extend_frame_inner_borders vpx_extend_frame_inner_borders_c
void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
#define vpx_yv12_copy_y vpx_yv12_copy_y_c
void vpx_scale_rtcd(void);
#ifdef RTCD_C
static void setup_rtcd_internal(void)
{
//Only MIPS has something here, but it is not supported
}
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif