libvpx: Necessary modifications

- convert ARM assembly (NEON) files, - add rtcd for run-time CPU features detection, - modify "system_state.h", - "arm_cpudetect.c" fixes.
2016-09-14 22:10:55 +02:00 · 2016-09-14 22:10:55 +02:00 · 1556d0d377
commit 1556d0d377
parent 5268443fdf
26 changed files with 5429 additions and 47 deletions
--- a/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h
+++ b/thirdparty/libvpx/rtcd/vp8_rtcd_arm.h
@ -0,0 +1,240 @@
 #ifndef VP8_RTCD_H_
 #define VP8_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * VP8
 */
 struct blockd;
 struct loop_filter_info;
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
 void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_clear_system_state_c();
 #define vp8_clear_system_state vp8_clear_system_state_c
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x4_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x8_neon(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 void vp8_dc_only_idct_add_neon(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_neon(short *input, short *dq, unsigned char *output, int stride);
 RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 void vp8_dequantize_b_c(struct blockd*, short *dqc);
 void vp8_dequantize_b_neon(struct blockd*, short *dqc);
 RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc);
 void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbh_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbv_neon(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bvs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_mbhs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_mbvs_neon(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
 void vp8_short_idct4x4llm_neon(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
 void vp8_short_inv_walsh4x4_c(short *input, short *output);
 void vp8_short_inv_walsh4x4_neon(short *input, short *output);
 RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
 void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
 #define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
 void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict16x16_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
 void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x4_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x8_neon(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/arm.h"
 static void setup_rtcd_internal(void)
 {
    int flags = arm_cpu_caps();
    vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_neon;
 #endif
    vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_neon;
 #endif
    vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_neon;
 #endif
    vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_copy_mem16x16 = vp8_copy_mem16x16_neon;
 #endif
    vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_copy_mem8x4 = vp8_copy_mem8x4_neon;
 #endif
    vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_copy_mem8x8 = vp8_copy_mem8x8_neon;
 #endif
    vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_dc_only_idct_add = vp8_dc_only_idct_add_neon;
 #endif
    vp8_dequant_idct_add = vp8_dequant_idct_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_dequant_idct_add = vp8_dequant_idct_add_neon;
 #endif
    vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_neon;
 #endif
    vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_neon;
 #endif
    vp8_dequantize_b = vp8_dequantize_b_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_dequantize_b = vp8_dequantize_b_neon;
 #endif
    vp8_loop_filter_bh = vp8_loop_filter_bh_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_bh = vp8_loop_filter_bh_neon;
 #endif
    vp8_loop_filter_bv = vp8_loop_filter_bv_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_bv = vp8_loop_filter_bv_neon;
 #endif
    vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_mbh = vp8_loop_filter_mbh_neon;
 #endif
    vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_mbv = vp8_loop_filter_mbv_neon;
 #endif
    vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_neon;
 #endif
    vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_neon;
 #endif
    vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_simple_mbh = vp8_loop_filter_mbhs_neon;
 #endif
    vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_loop_filter_simple_mbv = vp8_loop_filter_mbvs_neon;
 #endif
    vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_short_idct4x4llm = vp8_short_idct4x4llm_neon;
 #endif
    vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_neon;
 #endif
    vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_neon;
 #endif
    vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_neon;
 #endif
    vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_neon;
 #endif
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vp8_rtcd_c.h
+++ b/thirdparty/libvpx/rtcd/vp8_rtcd_c.h
@ -0,0 +1,117 @@
 #ifndef VP8_RTCD_H_
 #define VP8_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * VP8
 */
 struct blockd;
 struct loop_filter_info;
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_bilinear_predict16x16 vp8_bilinear_predict16x16_c
 void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_bilinear_predict4x4 vp8_bilinear_predict4x4_c
 void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_bilinear_predict8x4 vp8_bilinear_predict8x4_c
 void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_bilinear_predict8x8 vp8_bilinear_predict8x8_c
 void vp8_clear_system_state_c();
 #define vp8_clear_system_state vp8_clear_system_state_c
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 #define vp8_copy_mem16x16 vp8_copy_mem16x16_c
 void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 #define vp8_copy_mem8x4 vp8_copy_mem8x4_c
 void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 #define vp8_copy_mem8x8 vp8_copy_mem8x8_c
 void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 #define vp8_dc_only_idct_add vp8_dc_only_idct_add_c
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
 #define vp8_dequant_idct_add vp8_dequant_idct_add_c
 void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
 void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 #define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
 void vp8_dequantize_b_c(struct blockd*, short *dqc);
 #define vp8_dequantize_b vp8_dequantize_b_c
 void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 #define vp8_loop_filter_bh vp8_loop_filter_bh_c
 void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 #define vp8_loop_filter_bv vp8_loop_filter_bv_c
 void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 #define vp8_loop_filter_mbh vp8_loop_filter_mbh_c
 void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 #define vp8_loop_filter_mbv vp8_loop_filter_mbv_c
 void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
 #define vp8_loop_filter_simple_bh vp8_loop_filter_bhs_c
 void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
 #define vp8_loop_filter_simple_bv vp8_loop_filter_bvs_c
 void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
 #define vp8_loop_filter_simple_mbh vp8_loop_filter_simple_horizontal_edge_c
 void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
 #define vp8_loop_filter_simple_mbv vp8_loop_filter_simple_vertical_edge_c
 void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
 #define vp8_short_idct4x4llm vp8_short_idct4x4llm_c
 void vp8_short_inv_walsh4x4_c(short *input, short *output);
 #define vp8_short_inv_walsh4x4 vp8_short_inv_walsh4x4_c
 void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
 #define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
 void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict16x16 vp8_sixtap_predict16x16_c
 void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict4x4 vp8_sixtap_predict4x4_c
 void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x4 vp8_sixtap_predict8x4_c
 void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 #define vp8_sixtap_predict8x8 vp8_sixtap_predict8x8_c
 void vp8_rtcd(void);
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h
+++ b/thirdparty/libvpx/rtcd/vp8_rtcd_x86.h
@ -0,0 +1,247 @@
 #ifndef VP8_RTCD_H_
 #define VP8_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * VP8
 */
 struct blockd;
 struct loop_filter_info;
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vp8_bilinear_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_bilinear_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_bilinear_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_bilinear_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_bilinear_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_bilinear_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_clear_system_state_c();
 void vpx_reset_mmx_state();
 RTCD_EXTERN void (*vp8_clear_system_state)();
 void vp8_copy_mem16x16_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem16x16_sse2(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_copy_mem16x16)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x4_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x4_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_copy_mem8x4)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x8_c(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_copy_mem8x8_mmx(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_copy_mem8x8)(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch);
 void vp8_dc_only_idct_add_c(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 void vp8_dc_only_idct_add_mmx(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_dc_only_idct_add)(short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride);
 void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_mmx(short *input, short *dq, unsigned char *output, int stride);
 RTCD_EXTERN void (*vp8_dequant_idct_add)(short *input, short *dq, unsigned char *output, int stride);
 void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 void vp8_dequant_idct_add_uv_block_mmx(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 RTCD_EXTERN void (*vp8_dequant_idct_add_uv_block)(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs);
 void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 void vp8_dequant_idct_add_y_block_mmx(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 RTCD_EXTERN void (*vp8_dequant_idct_add_y_block)(short *q, short *dq, unsigned char *dst, int stride, char *eobs);
 void vp8_dequantize_b_c(struct blockd*, short *dqc);
 void vp8_dequantize_b_mmx(struct blockd*, short *dqc);
 RTCD_EXTERN void (*vp8_dequantize_b)(struct blockd*, short *dqc);
 void vp8_loop_filter_bh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_bh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_bv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbh_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbh_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbh_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_mbh)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbv_c(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbv_mmx(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_mbv_sse2(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 RTCD_EXTERN void (*vp8_loop_filter_mbv)(unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi);
 void vp8_loop_filter_bhs_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bhs_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bhs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_bh)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bvs_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bvs_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_bvs_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_bv)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_horizontal_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_horizontal_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_mbh)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_vertical_edge_mmx(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_loop_filter_simple_vertical_edge_sse2(unsigned char *y, int ystride, const unsigned char *blimit);
 RTCD_EXTERN void (*vp8_loop_filter_simple_mbv)(unsigned char *y, int ystride, const unsigned char *blimit);
 void vp8_short_idct4x4llm_c(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
 void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
 RTCD_EXTERN void (*vp8_short_idct4x4llm)(short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride);
 void vp8_short_inv_walsh4x4_c(short *input, short *output);
 void vp8_short_inv_walsh4x4_mmx(short *input, short *output);
 void vp8_short_inv_walsh4x4_sse2(short *input, short *output);
 RTCD_EXTERN void (*vp8_short_inv_walsh4x4)(short *input, short *output);
 void vp8_short_inv_walsh4x4_1_c(short *input, short *output);
 #define vp8_short_inv_walsh4x4_1 vp8_short_inv_walsh4x4_1_c
 void vp8_sixtap_predict16x16_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict16x16_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict16x16_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict16x16_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict16x16)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict4x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict4x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict4x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict4x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x4_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x4_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x4_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x4_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x4)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x8_c(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x8_mmx(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x8_sse2(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_sixtap_predict8x8_ssse3(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 RTCD_EXTERN void (*vp8_sixtap_predict8x8)(unsigned char *src, int src_pitch, int xofst, int yofst, unsigned char *dst, int dst_pitch);
 void vp8_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/x86.h"
 static void setup_rtcd_internal(void)
 {
    int flags = x86_simd_caps();
    vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_c;
    if (flags & HAS_MMX) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_mmx;
    if (flags & HAS_SSE2) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_sse2;
    if (flags & HAS_SSSE3) vp8_bilinear_predict16x16 = vp8_bilinear_predict16x16_ssse3;
    vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_c;
    if (flags & HAS_MMX) vp8_bilinear_predict4x4 = vp8_bilinear_predict4x4_mmx;
    vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_c;
    if (flags & HAS_MMX) vp8_bilinear_predict8x4 = vp8_bilinear_predict8x4_mmx;
    vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_c;
    if (flags & HAS_MMX) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_mmx;
    if (flags & HAS_SSE2) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_sse2;
    if (flags & HAS_SSSE3) vp8_bilinear_predict8x8 = vp8_bilinear_predict8x8_ssse3;
    vp8_clear_system_state = vp8_clear_system_state_c;
    if (flags & HAS_MMX) vp8_clear_system_state = vpx_reset_mmx_state;
    vp8_copy_mem16x16 = vp8_copy_mem16x16_c;
    if (flags & HAS_MMX) vp8_copy_mem16x16 = vp8_copy_mem16x16_mmx;
    if (flags & HAS_SSE2) vp8_copy_mem16x16 = vp8_copy_mem16x16_sse2;
    vp8_copy_mem8x4 = vp8_copy_mem8x4_c;
    if (flags & HAS_MMX) vp8_copy_mem8x4 = vp8_copy_mem8x4_mmx;
    vp8_copy_mem8x8 = vp8_copy_mem8x8_c;
    if (flags & HAS_MMX) vp8_copy_mem8x8 = vp8_copy_mem8x8_mmx;
    vp8_dc_only_idct_add = vp8_dc_only_idct_add_c;
    if (flags & HAS_MMX) vp8_dc_only_idct_add = vp8_dc_only_idct_add_mmx;
    vp8_dequant_idct_add = vp8_dequant_idct_add_c;
    if (flags & HAS_MMX) vp8_dequant_idct_add = vp8_dequant_idct_add_mmx;
    vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_c;
    if (flags & HAS_MMX) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_mmx;
    if (flags & HAS_SSE2) vp8_dequant_idct_add_uv_block = vp8_dequant_idct_add_uv_block_sse2;
    vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_c;
    if (flags & HAS_MMX) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_mmx;
    if (flags & HAS_SSE2) vp8_dequant_idct_add_y_block = vp8_dequant_idct_add_y_block_sse2;
    vp8_dequantize_b = vp8_dequantize_b_c;
    if (flags & HAS_MMX) vp8_dequantize_b = vp8_dequantize_b_mmx;
    vp8_loop_filter_bh = vp8_loop_filter_bh_c;
    if (flags & HAS_MMX) vp8_loop_filter_bh = vp8_loop_filter_bh_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_bh = vp8_loop_filter_bh_sse2;
    vp8_loop_filter_bv = vp8_loop_filter_bv_c;
    if (flags & HAS_MMX) vp8_loop_filter_bv = vp8_loop_filter_bv_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_bv = vp8_loop_filter_bv_sse2;
    vp8_loop_filter_mbh = vp8_loop_filter_mbh_c;
    if (flags & HAS_MMX) vp8_loop_filter_mbh = vp8_loop_filter_mbh_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_mbh = vp8_loop_filter_mbh_sse2;
    vp8_loop_filter_mbv = vp8_loop_filter_mbv_c;
    if (flags & HAS_MMX) vp8_loop_filter_mbv = vp8_loop_filter_mbv_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_mbv = vp8_loop_filter_mbv_sse2;
    vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_c;
    if (flags & HAS_MMX) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_simple_bh = vp8_loop_filter_bhs_sse2;
    vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_c;
    if (flags & HAS_MMX) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_simple_bv = vp8_loop_filter_bvs_sse2;
    vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_c;
    if (flags & HAS_MMX) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_simple_mbh = vp8_loop_filter_simple_horizontal_edge_sse2;
    vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_c;
    if (flags & HAS_MMX) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_mmx;
    if (flags & HAS_SSE2) vp8_loop_filter_simple_mbv = vp8_loop_filter_simple_vertical_edge_sse2;
    vp8_short_idct4x4llm = vp8_short_idct4x4llm_c;
    if (flags & HAS_MMX) vp8_short_idct4x4llm = vp8_short_idct4x4llm_mmx;
    vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_c;
    if (flags & HAS_MMX) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_mmx;
    if (flags & HAS_SSE2) vp8_short_inv_walsh4x4 = vp8_short_inv_walsh4x4_sse2;
    vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_c;
    if (flags & HAS_MMX) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_mmx;
    if (flags & HAS_SSE2) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_sse2;
    if (flags & HAS_SSSE3) vp8_sixtap_predict16x16 = vp8_sixtap_predict16x16_ssse3;
    vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_c;
    if (flags & HAS_MMX) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_mmx;
    if (flags & HAS_SSSE3) vp8_sixtap_predict4x4 = vp8_sixtap_predict4x4_ssse3;
    vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_c;
    if (flags & HAS_MMX) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_mmx;
    if (flags & HAS_SSE2) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_sse2;
    if (flags & HAS_SSSE3) vp8_sixtap_predict8x4 = vp8_sixtap_predict8x4_ssse3;
    vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_c;
    if (flags & HAS_MMX) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_mmx;
    if (flags & HAS_SSE2) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_sse2;
    if (flags & HAS_SSSE3) vp8_sixtap_predict8x8 = vp8_sixtap_predict8x8_ssse3;
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h
+++ b/thirdparty/libvpx/rtcd/vp9_rtcd_arm.h
@ -0,0 +1,54 @@
 #ifndef VP9_RTCD_H_
 #define VP9_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * VP9
 */
 #include "vp9/common/vp9_common.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
 #define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/arm.h"
 static void setup_rtcd_internal(void)
 {
    int flags = arm_cpu_caps();
    vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp9_iht4x4_16_add = vp9_iht4x4_16_add_neon;
 #endif
    vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vp9_iht8x8_64_add = vp9_iht8x8_64_add_neon;
 #endif
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vp9_rtcd_c.h
+++ b/thirdparty/libvpx/rtcd/vp9_rtcd_c.h
@ -0,0 +1,41 @@
 #ifndef VP9_RTCD_H_
 #define VP9_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * VP9
 */
 #include "vp9/common/vp9_common.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
 #define vp9_iht16x16_256_add vp9_iht16x16_256_add_c
 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 #define vp9_iht4x4_16_add vp9_iht4x4_16_add_c
 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 #define vp9_iht8x8_64_add vp9_iht8x8_64_add_c
 void vp9_rtcd(void);
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h
+++ b/thirdparty/libvpx/rtcd/vp9_rtcd_x86.h
@ -0,0 +1,55 @@
 #ifndef VP9_RTCD_H_
 #define VP9_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * VP9
 */
 #include "vp9/common/vp9_common.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
 void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
 RTCD_EXTERN void (*vp9_iht16x16_256_add)(const tran_low_t *input, uint8_t *output, int pitch, int tx_type);
 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 RTCD_EXTERN void (*vp9_iht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 RTCD_EXTERN void (*vp9_iht8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type);
 void vp9_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/x86.h"
 static void setup_rtcd_internal(void)
 {
    int flags = x86_simd_caps();
    vp9_iht16x16_256_add = vp9_iht16x16_256_add_c;
    if (flags & HAS_SSE2) vp9_iht16x16_256_add = vp9_iht16x16_256_add_sse2;
    vp9_iht4x4_16_add = vp9_iht4x4_16_add_c;
    if (flags & HAS_SSE2) vp9_iht4x4_16_add = vp9_iht4x4_16_add_sse2;
    vp9_iht8x8_64_add = vp9_iht8x8_64_add_c;
    if (flags & HAS_SSE2) vp9_iht8x8_64_add = vp9_iht8x8_64_add_sse2;
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h
+++ b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_arm.h
@ -0,0 +1,678 @@
 #ifndef VPX_DSP_RTCD_H_
 #define VPX_DSP_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * DSP
 */
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
 void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
 void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
 void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
 void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
 void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
 void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d135_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
 void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c
 void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c
 void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c
 void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c
 void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c
 void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c
 void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c
 void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
 void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
 void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
 void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
 void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
 void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c
 void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
 void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
 void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
 void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
 void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
 void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c
 void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c
 void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
 void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
 void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
 void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
 void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
 void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
 void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
 void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c
 void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
 void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_4_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_8_neon(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_2d vpx_scaled_2d_c
 void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
 void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
 void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
 void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_horiz vpx_scaled_horiz_c
 void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_vert vpx_scaled_vert_c
 void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
 void vpx_dsp_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/arm.h"
 static void setup_rtcd_internal(void)
 {
    int flags = arm_cpu_caps();
    vpx_convolve8 = vpx_convolve8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve8 = vpx_convolve8_neon;
 #endif
    vpx_convolve8_avg = vpx_convolve8_avg_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve8_avg = vpx_convolve8_avg_neon;
 #endif
    vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_neon;
 #endif
    vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_neon;
 #endif
    vpx_convolve8_horiz = vpx_convolve8_horiz_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve8_horiz = vpx_convolve8_horiz_neon;
 #endif
    vpx_convolve8_vert = vpx_convolve8_vert_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve8_vert = vpx_convolve8_vert_neon;
 #endif
    vpx_convolve_avg = vpx_convolve_avg_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve_avg = vpx_convolve_avg_neon;
 #endif
    vpx_convolve_copy = vpx_convolve_copy_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_convolve_copy = vpx_convolve_copy_neon;
 #endif
    vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_d135_predictor_4x4 = vpx_d135_predictor_4x4_neon;
 #endif
    vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_neon;
 #endif
    vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_neon;
 #endif
    vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_neon;
 #endif
    vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_neon;
 #endif
    vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_neon;
 #endif
    vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_neon;
 #endif
    vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_neon;
 #endif
    vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_neon;
 #endif
    vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_neon;
 #endif
    vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_neon;
 #endif
    vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_neon;
 #endif
    vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_neon;
 #endif
    vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_neon;
 #endif
    vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_neon;
 #endif
    vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_neon;
 #endif
    vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_neon;
 #endif
    vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_neon;
 #endif
    vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_neon;
 #endif
    vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_neon;
 #endif
    vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_neon;
 #endif
    vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_neon;
 #endif
    vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_neon;
 #endif
    vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_neon;
 #endif
    vpx_idct16x16_10_add = vpx_idct16x16_10_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct16x16_10_add = vpx_idct16x16_10_add_neon;
 #endif
    vpx_idct16x16_1_add = vpx_idct16x16_1_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct16x16_1_add = vpx_idct16x16_1_add_neon;
 #endif
    vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct16x16_256_add = vpx_idct16x16_256_add_neon;
 #endif
    vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_neon;
 #endif
    vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_neon;
 #endif
    vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct32x32_1_add = vpx_idct32x32_1_add_neon;
 #endif
    vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct32x32_34_add = vpx_idct32x32_1024_add_neon;
 #endif
    vpx_idct4x4_16_add = vpx_idct4x4_16_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct4x4_16_add = vpx_idct4x4_16_add_neon;
 #endif
    vpx_idct4x4_1_add = vpx_idct4x4_1_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct4x4_1_add = vpx_idct4x4_1_add_neon;
 #endif
    vpx_idct8x8_12_add = vpx_idct8x8_12_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct8x8_12_add = vpx_idct8x8_12_add_neon;
 #endif
    vpx_idct8x8_1_add = vpx_idct8x8_1_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct8x8_1_add = vpx_idct8x8_1_add_neon;
 #endif
    vpx_idct8x8_64_add = vpx_idct8x8_64_add_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_idct8x8_64_add = vpx_idct8x8_64_add_neon;
 #endif
    vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_neon;
 #endif
    vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_neon;
 #endif
    vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_neon;
 #endif
    vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_neon;
 #endif
    vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_neon;
 #endif
    vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_neon;
 #endif
    vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_neon;
 #endif
    vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_neon;
 #endif
    vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_neon;
 #endif
    vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_neon;
 #endif
    vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_neon;
 #endif
    vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_neon;
 #endif
    vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_neon;
 #endif
    vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_neon;
 #endif
    vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_neon;
 #endif
    vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_neon;
 #endif
    vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_neon;
 #endif
    vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_neon;
 #endif
    vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_neon;
 #endif
    vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c;
 #if HAVE_NEON
    if (flags & HAS_NEON) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_neon;
 #endif
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h
+++ b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_c.h
@ -0,0 +1,355 @@
 #ifndef VPX_DSP_RTCD_H_
 #define VPX_DSP_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * DSP
 */
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve8 vpx_convolve8_c
 void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve8_avg vpx_convolve8_avg_c
 void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve8_avg_horiz vpx_convolve8_avg_horiz_c
 void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve8_avg_vert vpx_convolve8_avg_vert_c
 void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve8_horiz vpx_convolve8_horiz_c
 void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve8_vert vpx_convolve8_vert_c
 void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve_avg vpx_convolve_avg_c
 void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_convolve_copy vpx_convolve_copy_c
 void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
 void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
 void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
 void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
 void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
 void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
 void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c
 void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
 void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_16x16 vpx_d153_predictor_16x16_c
 void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_32x32 vpx_d153_predictor_32x32_c
 void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_4x4 vpx_d153_predictor_4x4_c
 void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d153_predictor_8x8 vpx_d153_predictor_8x8_c
 void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_16x16 vpx_d207_predictor_16x16_c
 void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_32x32 vpx_d207_predictor_32x32_c
 void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_4x4 vpx_d207_predictor_4x4_c
 void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207_predictor_8x8 vpx_d207_predictor_8x8_c
 void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
 void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
 void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
 void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
 void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45_predictor_16x16 vpx_d45_predictor_16x16_c
 void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45_predictor_32x32 vpx_d45_predictor_32x32_c
 void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45_predictor_4x4 vpx_d45_predictor_4x4_c
 void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45_predictor_8x8 vpx_d45_predictor_8x8_c
 void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
 void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
 void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
 void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
 void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_16x16 vpx_d63_predictor_16x16_c
 void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_32x32 vpx_d63_predictor_32x32_c
 void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_4x4 vpx_d63_predictor_4x4_c
 void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63_predictor_8x8 vpx_d63_predictor_8x8_c
 void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
 void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
 void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
 void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
 void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
 void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_128_predictor_16x16 vpx_dc_128_predictor_16x16_c
 void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_128_predictor_32x32 vpx_dc_128_predictor_32x32_c
 void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_128_predictor_4x4 vpx_dc_128_predictor_4x4_c
 void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_128_predictor_8x8 vpx_dc_128_predictor_8x8_c
 void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_left_predictor_16x16 vpx_dc_left_predictor_16x16_c
 void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_left_predictor_32x32 vpx_dc_left_predictor_32x32_c
 void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_left_predictor_4x4 vpx_dc_left_predictor_4x4_c
 void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_left_predictor_8x8 vpx_dc_left_predictor_8x8_c
 void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_predictor_16x16 vpx_dc_predictor_16x16_c
 void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_predictor_32x32 vpx_dc_predictor_32x32_c
 void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_predictor_4x4 vpx_dc_predictor_4x4_c
 void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_predictor_8x8 vpx_dc_predictor_8x8_c
 void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_top_predictor_16x16 vpx_dc_top_predictor_16x16_c
 void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_top_predictor_32x32 vpx_dc_top_predictor_32x32_c
 void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_top_predictor_4x4 vpx_dc_top_predictor_4x4_c
 void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_dc_top_predictor_8x8 vpx_dc_top_predictor_8x8_c
 void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_h_predictor_16x16 vpx_h_predictor_16x16_c
 void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_h_predictor_32x32 vpx_h_predictor_32x32_c
 void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_h_predictor_4x4 vpx_h_predictor_4x4_c
 void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_h_predictor_8x8 vpx_h_predictor_8x8_c
 void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
 void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct16x16_10_add vpx_idct16x16_10_add_c
 void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct16x16_1_add vpx_idct16x16_1_add_c
 void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct16x16_256_add vpx_idct16x16_256_add_c
 void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct32x32_1024_add vpx_idct32x32_1024_add_c
 void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct32x32_135_add vpx_idct32x32_135_add_c
 void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct32x32_1_add vpx_idct32x32_1_add_c
 void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct32x32_34_add vpx_idct32x32_34_add_c
 void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct4x4_16_add vpx_idct4x4_16_add_c
 void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct4x4_1_add vpx_idct4x4_1_add_c
 void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct8x8_12_add vpx_idct8x8_12_add_c
 void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct8x8_1_add vpx_idct8x8_1_add_c
 void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_idct8x8_64_add vpx_idct8x8_64_add_c
 void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_iwht4x4_16_add vpx_iwht4x4_16_add_c
 void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
 void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_horizontal_4 vpx_lpf_horizontal_4_c
 void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vpx_lpf_horizontal_4_dual vpx_lpf_horizontal_4_dual_c
 void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_horizontal_8 vpx_lpf_horizontal_8_c
 void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vpx_lpf_horizontal_8_dual vpx_lpf_horizontal_8_dual_c
 void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_horizontal_edge_16 vpx_lpf_horizontal_edge_16_c
 void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_horizontal_edge_8 vpx_lpf_horizontal_edge_8_c
 void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_vertical_16 vpx_lpf_vertical_16_c
 void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_vertical_16_dual vpx_lpf_vertical_16_dual_c
 void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_vertical_4 vpx_lpf_vertical_4_c
 void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vpx_lpf_vertical_4_dual vpx_lpf_vertical_4_dual_c
 void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 #define vpx_lpf_vertical_8 vpx_lpf_vertical_8_c
 void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 #define vpx_lpf_vertical_8_dual vpx_lpf_vertical_8_dual_c
 void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_2d vpx_scaled_2d_c
 void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
 void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
 void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
 void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_horiz vpx_scaled_horiz_c
 void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_vert vpx_scaled_vert_c
 void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_tm_predictor_16x16 vpx_tm_predictor_16x16_c
 void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_tm_predictor_32x32 vpx_tm_predictor_32x32_c
 void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_tm_predictor_4x4 vpx_tm_predictor_4x4_c
 void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_tm_predictor_8x8 vpx_tm_predictor_8x8_c
 void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_v_predictor_16x16 vpx_v_predictor_16x16_c
 void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_v_predictor_32x32 vpx_v_predictor_32x32_c
 void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_v_predictor_4x4 vpx_v_predictor_4x4_c
 void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_v_predictor_8x8 vpx_v_predictor_8x8_c
 void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
 void vpx_dsp_rtcd(void);
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h
+++ b/thirdparty/libvpx/rtcd/vpx_dsp_rtcd_x86.h
@ -0,0 +1,614 @@
 #ifndef VPX_DSP_RTCD_H_
 #define VPX_DSP_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 /*
 * DSP
 */
 #include "vpx/vpx_integer.h"
 #include "vpx_dsp/vpx_dsp_common.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_avg_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_avg_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_horiz)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve8_vert)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve_avg)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_convolve_copy_sse2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_convolve_copy)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_d117_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_16x16 vpx_d117_predictor_16x16_c
 void vpx_d117_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_32x32 vpx_d117_predictor_32x32_c
 void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_4x4 vpx_d117_predictor_4x4_c
 void vpx_d117_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d117_predictor_8x8 vpx_d117_predictor_8x8_c
 void vpx_d135_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_16x16 vpx_d135_predictor_16x16_c
 void vpx_d135_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_32x32 vpx_d135_predictor_32x32_c
 void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_4x4 vpx_d135_predictor_4x4_c
 void vpx_d135_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d135_predictor_8x8 vpx_d135_predictor_8x8_c
 void vpx_d153_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d153_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d153_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d153_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d153_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d153_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d153_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d153_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d153_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d153_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d153_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d207_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d207_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d207_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d207_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d207e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_16x16 vpx_d207e_predictor_16x16_c
 void vpx_d207e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_32x32 vpx_d207e_predictor_32x32_c
 void vpx_d207e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_4x4 vpx_d207e_predictor_4x4_c
 void vpx_d207e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d207e_predictor_8x8 vpx_d207e_predictor_8x8_c
 void vpx_d45_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d45_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d45_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d45_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d45_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d45e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_16x16 vpx_d45e_predictor_16x16_c
 void vpx_d45e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_32x32 vpx_d45e_predictor_32x32_c
 void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_4x4 vpx_d45e_predictor_4x4_c
 void vpx_d45e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d45e_predictor_8x8 vpx_d45e_predictor_8x8_c
 void vpx_d63_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63_predictor_16x16_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d63_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63_predictor_32x32_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d63_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63_predictor_4x4_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d63_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63_predictor_8x8_ssse3(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_d63_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_d63e_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_16x16 vpx_d63e_predictor_16x16_c
 void vpx_d63e_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_32x32 vpx_d63e_predictor_32x32_c
 void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_4x4 vpx_d63e_predictor_4x4_c
 void vpx_d63e_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63e_predictor_8x8 vpx_d63e_predictor_8x8_c
 void vpx_d63f_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_d63f_predictor_4x4 vpx_d63f_predictor_4x4_c
 void vpx_dc_128_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_128_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_128_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_left_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_left_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_dc_top_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_dc_top_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_h_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_h_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_he_predictor_4x4 vpx_he_predictor_4x4_c
 void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct16x16_10_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct16x16_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct16x16_256_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_1024_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_135_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct32x32_34_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct4x4_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct8x8_12_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct8x8_1_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_idct8x8_64_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_iwht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int dest_stride);
 RTCD_EXTERN void (*vpx_iwht4x4_16_add)(const tran_low_t *input, uint8_t *dest, int dest_stride);
 void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int dest_stride);
 #define vpx_iwht4x4_1_add vpx_iwht4x4_1_add_c
 void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_horizontal_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_horizontal_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_16_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_edge_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_horizontal_edge_8_avx2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_horizontal_edge_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_16)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_16_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_16_dual)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_4)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_vertical_4_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_8_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 RTCD_EXTERN void (*vpx_lpf_vertical_8)(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh);
 void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 RTCD_EXTERN void (*vpx_lpf_vertical_8_dual)(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1);
 void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 RTCD_EXTERN void (*vpx_scaled_2d)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_2d vpx_scaled_avg_2d_c
 void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_horiz vpx_scaled_avg_horiz_c
 void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_avg_vert vpx_scaled_avg_vert_c
 void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_horiz vpx_scaled_horiz_c
 void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h);
 #define vpx_scaled_vert vpx_scaled_vert_c
 void vpx_tm_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_tm_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_tm_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_16x16_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_16x16_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_16x16)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_32x32_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_32x32_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_32x32)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_4x4_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_4x4)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_8x8_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_v_predictor_8x8_sse2(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 RTCD_EXTERN void (*vpx_v_predictor_8x8)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left);
 #define vpx_ve_predictor_4x4 vpx_ve_predictor_4x4_c
 void vpx_dsp_rtcd(void);
 #ifdef RTCD_C
 #include "vpx_ports/x86.h"
 static void setup_rtcd_internal(void)
 {
    int flags = x86_simd_caps();
    vpx_convolve8 = vpx_convolve8_c;
    if (flags & HAS_SSE2) vpx_convolve8 = vpx_convolve8_sse2;
    if (flags & HAS_SSSE3) vpx_convolve8 = vpx_convolve8_ssse3;
    if (flags & HAS_AVX2) vpx_convolve8 = vpx_convolve8_avx2;
    vpx_convolve8_avg = vpx_convolve8_avg_c;
    if (flags & HAS_SSE2) vpx_convolve8_avg = vpx_convolve8_avg_sse2;
    if (flags & HAS_SSSE3) vpx_convolve8_avg = vpx_convolve8_avg_ssse3;
    vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_c;
    if (flags & HAS_SSE2) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_sse2;
    if (flags & HAS_SSSE3) vpx_convolve8_avg_horiz = vpx_convolve8_avg_horiz_ssse3;
    vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_c;
    if (flags & HAS_SSE2) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_sse2;
    if (flags & HAS_SSSE3) vpx_convolve8_avg_vert = vpx_convolve8_avg_vert_ssse3;
    vpx_convolve8_horiz = vpx_convolve8_horiz_c;
    if (flags & HAS_SSE2) vpx_convolve8_horiz = vpx_convolve8_horiz_sse2;
    if (flags & HAS_SSSE3) vpx_convolve8_horiz = vpx_convolve8_horiz_ssse3;
    if (flags & HAS_AVX2) vpx_convolve8_horiz = vpx_convolve8_horiz_avx2;
    vpx_convolve8_vert = vpx_convolve8_vert_c;
    if (flags & HAS_SSE2) vpx_convolve8_vert = vpx_convolve8_vert_sse2;
    if (flags & HAS_SSSE3) vpx_convolve8_vert = vpx_convolve8_vert_ssse3;
    if (flags & HAS_AVX2) vpx_convolve8_vert = vpx_convolve8_vert_avx2;
    vpx_convolve_avg = vpx_convolve_avg_c;
    if (flags & HAS_SSE2) vpx_convolve_avg = vpx_convolve_avg_sse2;
    vpx_convolve_copy = vpx_convolve_copy_c;
    if (flags & HAS_SSE2) vpx_convolve_copy = vpx_convolve_copy_sse2;
    vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_c;
    if (flags & HAS_SSSE3) vpx_d153_predictor_16x16 = vpx_d153_predictor_16x16_ssse3;
    vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_c;
    if (flags & HAS_SSSE3) vpx_d153_predictor_32x32 = vpx_d153_predictor_32x32_ssse3;
    vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_c;
    if (flags & HAS_SSSE3) vpx_d153_predictor_4x4 = vpx_d153_predictor_4x4_ssse3;
    vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_c;
    if (flags & HAS_SSSE3) vpx_d153_predictor_8x8 = vpx_d153_predictor_8x8_ssse3;
    vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_c;
    if (flags & HAS_SSSE3) vpx_d207_predictor_16x16 = vpx_d207_predictor_16x16_ssse3;
    vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_c;
    if (flags & HAS_SSSE3) vpx_d207_predictor_32x32 = vpx_d207_predictor_32x32_ssse3;
    vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_d207_predictor_4x4 = vpx_d207_predictor_4x4_sse2;
    vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_c;
    if (flags & HAS_SSSE3) vpx_d207_predictor_8x8 = vpx_d207_predictor_8x8_ssse3;
    vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_c;
    if (flags & HAS_SSSE3) vpx_d45_predictor_16x16 = vpx_d45_predictor_16x16_ssse3;
    vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_c;
    if (flags & HAS_SSSE3) vpx_d45_predictor_32x32 = vpx_d45_predictor_32x32_ssse3;
    vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_d45_predictor_4x4 = vpx_d45_predictor_4x4_sse2;
    vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_d45_predictor_8x8 = vpx_d45_predictor_8x8_sse2;
    vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_c;
    if (flags & HAS_SSSE3) vpx_d63_predictor_16x16 = vpx_d63_predictor_16x16_ssse3;
    vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_c;
    if (flags & HAS_SSSE3) vpx_d63_predictor_32x32 = vpx_d63_predictor_32x32_ssse3;
    vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_c;
    if (flags & HAS_SSSE3) vpx_d63_predictor_4x4 = vpx_d63_predictor_4x4_ssse3;
    vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_c;
    if (flags & HAS_SSSE3) vpx_d63_predictor_8x8 = vpx_d63_predictor_8x8_ssse3;
    vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_c;
    if (flags & HAS_SSE2) vpx_dc_128_predictor_16x16 = vpx_dc_128_predictor_16x16_sse2;
    vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_c;
    if (flags & HAS_SSE2) vpx_dc_128_predictor_32x32 = vpx_dc_128_predictor_32x32_sse2;
    vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_dc_128_predictor_4x4 = vpx_dc_128_predictor_4x4_sse2;
    vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_dc_128_predictor_8x8 = vpx_dc_128_predictor_8x8_sse2;
    vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_c;
    if (flags & HAS_SSE2) vpx_dc_left_predictor_16x16 = vpx_dc_left_predictor_16x16_sse2;
    vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_c;
    if (flags & HAS_SSE2) vpx_dc_left_predictor_32x32 = vpx_dc_left_predictor_32x32_sse2;
    vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_dc_left_predictor_4x4 = vpx_dc_left_predictor_4x4_sse2;
    vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_dc_left_predictor_8x8 = vpx_dc_left_predictor_8x8_sse2;
    vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_c;
    if (flags & HAS_SSE2) vpx_dc_predictor_16x16 = vpx_dc_predictor_16x16_sse2;
    vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_c;
    if (flags & HAS_SSE2) vpx_dc_predictor_32x32 = vpx_dc_predictor_32x32_sse2;
    vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_dc_predictor_4x4 = vpx_dc_predictor_4x4_sse2;
    vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_dc_predictor_8x8 = vpx_dc_predictor_8x8_sse2;
    vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_c;
    if (flags & HAS_SSE2) vpx_dc_top_predictor_16x16 = vpx_dc_top_predictor_16x16_sse2;
    vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_c;
    if (flags & HAS_SSE2) vpx_dc_top_predictor_32x32 = vpx_dc_top_predictor_32x32_sse2;
    vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_dc_top_predictor_4x4 = vpx_dc_top_predictor_4x4_sse2;
    vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_dc_top_predictor_8x8 = vpx_dc_top_predictor_8x8_sse2;
    vpx_h_predictor_16x16 = vpx_h_predictor_16x16_c;
    if (flags & HAS_SSE2) vpx_h_predictor_16x16 = vpx_h_predictor_16x16_sse2;
    vpx_h_predictor_32x32 = vpx_h_predictor_32x32_c;
    if (flags & HAS_SSE2) vpx_h_predictor_32x32 = vpx_h_predictor_32x32_sse2;
    vpx_h_predictor_4x4 = vpx_h_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_h_predictor_4x4 = vpx_h_predictor_4x4_sse2;
    vpx_h_predictor_8x8 = vpx_h_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_h_predictor_8x8 = vpx_h_predictor_8x8_sse2;
    vpx_idct16x16_10_add = vpx_idct16x16_10_add_c;
    if (flags & HAS_SSE2) vpx_idct16x16_10_add = vpx_idct16x16_10_add_sse2;
    vpx_idct16x16_1_add = vpx_idct16x16_1_add_c;
    if (flags & HAS_SSE2) vpx_idct16x16_1_add = vpx_idct16x16_1_add_sse2;
    vpx_idct16x16_256_add = vpx_idct16x16_256_add_c;
    if (flags & HAS_SSE2) vpx_idct16x16_256_add = vpx_idct16x16_256_add_sse2;
    vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_c;
    if (flags & HAS_SSE2) vpx_idct32x32_1024_add = vpx_idct32x32_1024_add_sse2;
    vpx_idct32x32_135_add = vpx_idct32x32_135_add_c;
    if (flags & HAS_SSE2) vpx_idct32x32_135_add = vpx_idct32x32_1024_add_sse2;
    vpx_idct32x32_1_add = vpx_idct32x32_1_add_c;
    if (flags & HAS_SSE2) vpx_idct32x32_1_add = vpx_idct32x32_1_add_sse2;
    vpx_idct32x32_34_add = vpx_idct32x32_34_add_c;
    if (flags & HAS_SSE2) vpx_idct32x32_34_add = vpx_idct32x32_34_add_sse2;
    vpx_idct4x4_16_add = vpx_idct4x4_16_add_c;
    if (flags & HAS_SSE2) vpx_idct4x4_16_add = vpx_idct4x4_16_add_sse2;
    vpx_idct4x4_1_add = vpx_idct4x4_1_add_c;
    if (flags & HAS_SSE2) vpx_idct4x4_1_add = vpx_idct4x4_1_add_sse2;
    vpx_idct8x8_12_add = vpx_idct8x8_12_add_c;
    if (flags & HAS_SSE2) vpx_idct8x8_12_add = vpx_idct8x8_12_add_sse2;
    vpx_idct8x8_1_add = vpx_idct8x8_1_add_c;
    if (flags & HAS_SSE2) vpx_idct8x8_1_add = vpx_idct8x8_1_add_sse2;
    vpx_idct8x8_64_add = vpx_idct8x8_64_add_c;
    if (flags & HAS_SSE2) vpx_idct8x8_64_add = vpx_idct8x8_64_add_sse2;
    vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_c;
    if (flags & HAS_SSE2) vpx_iwht4x4_16_add = vpx_iwht4x4_16_add_sse2;
    vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_c;
    if (flags & HAS_SSE2) vpx_lpf_horizontal_4 = vpx_lpf_horizontal_4_sse2;
    vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_c;
    if (flags & HAS_SSE2) vpx_lpf_horizontal_4_dual = vpx_lpf_horizontal_4_dual_sse2;
    vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_c;
    if (flags & HAS_SSE2) vpx_lpf_horizontal_8 = vpx_lpf_horizontal_8_sse2;
    vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_c;
    if (flags & HAS_SSE2) vpx_lpf_horizontal_8_dual = vpx_lpf_horizontal_8_dual_sse2;
    vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_c;
    if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_sse2;
    if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_16 = vpx_lpf_horizontal_edge_16_avx2;
    vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_c;
    if (flags & HAS_SSE2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_sse2;
    if (flags & HAS_AVX2) vpx_lpf_horizontal_edge_8 = vpx_lpf_horizontal_edge_8_avx2;
    vpx_lpf_vertical_16 = vpx_lpf_vertical_16_c;
    if (flags & HAS_SSE2) vpx_lpf_vertical_16 = vpx_lpf_vertical_16_sse2;
    vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_c;
    if (flags & HAS_SSE2) vpx_lpf_vertical_16_dual = vpx_lpf_vertical_16_dual_sse2;
    vpx_lpf_vertical_4 = vpx_lpf_vertical_4_c;
    if (flags & HAS_SSE2) vpx_lpf_vertical_4 = vpx_lpf_vertical_4_sse2;
    vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_c;
    if (flags & HAS_SSE2) vpx_lpf_vertical_4_dual = vpx_lpf_vertical_4_dual_sse2;
    vpx_lpf_vertical_8 = vpx_lpf_vertical_8_c;
    if (flags & HAS_SSE2) vpx_lpf_vertical_8 = vpx_lpf_vertical_8_sse2;
    vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_c;
    if (flags & HAS_SSE2) vpx_lpf_vertical_8_dual = vpx_lpf_vertical_8_dual_sse2;
    vpx_scaled_2d = vpx_scaled_2d_c;
    if (flags & HAS_SSSE3) vpx_scaled_2d = vpx_scaled_2d_ssse3;
    vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_c;
    if (flags & HAS_SSE2) vpx_tm_predictor_16x16 = vpx_tm_predictor_16x16_sse2;
    vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_c;
    if (flags & HAS_SSE2) vpx_tm_predictor_32x32 = vpx_tm_predictor_32x32_sse2;
    vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_tm_predictor_4x4 = vpx_tm_predictor_4x4_sse2;
    vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_tm_predictor_8x8 = vpx_tm_predictor_8x8_sse2;
    vpx_v_predictor_16x16 = vpx_v_predictor_16x16_c;
    if (flags & HAS_SSE2) vpx_v_predictor_16x16 = vpx_v_predictor_16x16_sse2;
    vpx_v_predictor_32x32 = vpx_v_predictor_32x32_c;
    if (flags & HAS_SSE2) vpx_v_predictor_32x32 = vpx_v_predictor_32x32_sse2;
    vpx_v_predictor_4x4 = vpx_v_predictor_4x4_c;
    if (flags & HAS_SSE2) vpx_v_predictor_4x4 = vpx_v_predictor_4x4_sse2;
    vpx_v_predictor_8x8 = vpx_v_predictor_8x8_c;
    if (flags & HAS_SSE2) vpx_v_predictor_8x8 = vpx_v_predictor_8x8_sse2;
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif
--- a/thirdparty/libvpx/vp8_rtcd.h
+++ b/thirdparty/libvpx/vp8_rtcd.h
@ -0,0 +1,9 @@
 #include "vpx_config.h"
 #if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
 	#include "rtcd/vp8_rtcd_x86.h"
 #elif defined(WEBM_ARMASM) && ARCH_ARM
 	#include "rtcd/vp8_rtcd_arm.h"
 #else
 	#include "rtcd/vp8_rtcd_c.h"
 #endif
--- a/thirdparty/libvpx/vp9_rtcd.h
+++ b/thirdparty/libvpx/vp9_rtcd.h
@ -0,0 +1,9 @@
 #include "vpx_config.h"
 #if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
 	#include "rtcd/vp9_rtcd_x86.h"
 #elif defined(WEBM_ARMASM) && ARCH_ARM
 	#include "rtcd/vp9_rtcd_arm.h"
 #else
 	#include "rtcd/vp9_rtcd_c.h"
 #endif
--- a/thirdparty/libvpx/vpx_config.asm
+++ b/thirdparty/libvpx/vpx_config.asm
@ -0,0 +1,65 @@
 %ifdef X86_32
 	ARCH_X86 equ 1
 	ARCH_X86_64 equ 0
 %elifdef X86_64
 	ARCH_X86 equ 0
 	ARCH_X86_64 equ 1
 %endif
 HAVE_VPX_PORTS equ 1
 CONFIG_DEPENDENCY_TRACKING equ 0
 CONFIG_EXTERNAL_BUILD equ 0
 CONFIG_INSTALL_DOCS equ 0
 CONFIG_INSTALL_BINS equ 0
 CONFIG_INSTALL_LIBS equ 0
 CONFIG_INSTALL_SRCS equ 0
 CONFIG_USE_X86INC equ 1
 CONFIG_DEBUG equ 0
 CONFIG_GPROF equ 0
 CONFIG_GCOV equ 0
 CONFIG_RVCT equ 0
 CONFIG_PIC equ 1 ;TODO: autodetect
 CONFIG_CODEC_SRCS equ 0
 CONFIG_DEBUG_LIBS equ 0
 CONFIG_DEQUANT_TOKENS equ 0
 CONFIG_DC_RECON equ 0
 CONFIG_RUNTIME_CPU_DETECT equ 1
 CONFIG_POSTPROC equ 0
 CONFIG_VP9_POSTPROC equ 0
 CONFIG_MULTITHREAD equ 1
 CONFIG_INTERNAL_STATS equ 0
 CONFIG_VP8_ENCODER equ 0
 CONFIG_VP8_DECODER equ 1
 CONFIG_VP9_ENCODER equ 0
 CONFIG_VP9_DECODER equ 1
 CONFIG_VP8 equ 1
 CONFIG_VP9 equ 1
 CONFIG_ENCODERS equ 0
 CONFIG_DECODERS equ 1
 CONFIG_STATIC_MSVCRT equ 0
 CONFIG_SPATIAL_RESAMPLING equ 0
 CONFIG_REALTIME_ONLY equ 0
 CONFIG_ONTHEFLY_BITPACKING equ 0
 CONFIG_ERROR_CONCEALMENT equ 0
 CONFIG_SHARED equ 0
 CONFIG_STATIC equ 0
 CONFIG_SMALL equ 0
 CONFIG_POSTPROC_VISUALIZER equ 0
 CONFIG_OS_SUPPORT equ 1
 CONFIG_UNIT_TESTS equ 0
 CONFIG_WEBM_IO equ 0
 CONFIG_LIBYUV equ 0
 CONFIG_DECODE_PERF_TESTS equ 0
 CONFIG_ENCODE_PERF_TESTS equ 0
 CONFIG_MULTI_RES_ENCODING equ 0
 CONFIG_TEMPORAL_DENOISING equ 1
 CONFIG_VP9_TEMPORAL_DENOISING equ 0
 CONFIG_COEFFICIENT_RANGE_CHECKING equ 0
 CONFIG_VP9_HIGHBITDEPTH equ 0
 CONFIG_BETTER_HW_COMPATIBILITY equ 0
 CONFIG_EXPERIMENTAL equ 0
 CONFIG_SIZE_LIMIT equ 0
 CONFIG_SPATIAL_SVC equ 0
 CONFIG_FP_MB_STATS equ 0
 CONFIG_EMULATE_HARDWARE equ 0
 CONFIG_MISC_FIXES equ 0
--- a/thirdparty/libvpx/vpx_config.h
+++ b/thirdparty/libvpx/vpx_config.h
@ -0,0 +1,124 @@
 /* Copyright (c) 2011 The WebM project authors. All Rights Reserved. */
 /*  */
 /* Use of this source code is governed by a BSD-style license */
 /* that can be found in the LICENSE file in the root of the source */
 /* tree. An additional intellectual property rights grant can be found */
 /* in the file PATENTS.  All contributing project authors may */
 /* be found in the AUTHORS file in the root of the source tree. */
 /* This file automatically generated by configure. Do not edit! */
 #ifndef VPX_CONFIG_H
 #define VPX_CONFIG_H
 #define RESTRICT
 #define INLINE inline
 #define HAVE_MIPS32 0
 #define HAVE_MEDIA 0
 #if defined(__i386) || defined(__i386__) || defined(_M_IX86)
 	#define ARCH_X86 1
 	#define ARCH_X86_64 0
 	#define ARCH_ARM 0
 	#define HAVE_NEON 0
 	#define HAVE_NEON_ASM 0
 	#define HAVE_MMX 1
 	#define HAVE_SSE2 1
 	#define HAVE_SSSE3 1
 	#define HAVE_AVX2 1
 #elif defined(__x86_64) || defined(__x86_64__) || defined(__amd64) || defined(_M_X64)
 	#define ARCH_X86 0
 	#define ARCH_X86_64 1
 	#define ARCH_ARM 0
 	#define HAVE_NEON 0
 	#define HAVE_NEON_ASM 0
 	#define HAVE_MMX 1
 	#define HAVE_SSE2 1
 	#define HAVE_SSSE3 1
 	#define HAVE_AVX2 1
 #elif defined(__arm__) || defined(__TARGET_ARCH_ARM) || defined(_M_ARM) || defined(__aarch64__)
 	#define ARCH_X86 0
 	#define ARCH_X86_64 0
 	#define ARCH_ARM 1
 	#define HAVE_NEON 1
 	#define HAVE_NEON_ASM 1
 #else
 	#define ARCH_X86 0
 	#define ARCH_X86_64 0
 	#define ARCH_ARM 0
 	#define HAVE_NEON 0
 	#define HAVE_NEON_ASM 0
 #endif
 #define CONFIG_BIG_ENDIAN 0 //TODO: Autodetect
 #ifdef _WIN32
 	#define HAVE_PTHREAD_H 0
 	#define HAVE_UNISTD_H 0
 #else
 	#define HAVE_PTHREAD_H 1
 	#define HAVE_UNISTD_H 1
 #endif
 /**/
 #define HAVE_VPX_PORTS 1
 #define CONFIG_DEPENDENCY_TRACKING 0
 #define CONFIG_EXTERNAL_BUILD 0
 #define CONFIG_INSTALL_DOCS 0
 #define CONFIG_INSTALL_BINS 0
 #define CONFIG_INSTALL_LIBS 0
 #define CONFIG_INSTALL_SRCS 0
 #define CONFIG_DEBUG 0
 #define CONFIG_GPROF 0
 #define CONFIG_GCOV 0
 #define CONFIG_RVCT 0
 #define CONFIG_CODEC_SRCS 0
 #define CONFIG_DEBUG_LIBS 0
 #define CONFIG_DEQUANT_TOKENS 0
 #define CONFIG_DC_RECON 0
 #define CONFIG_RUNTIME_CPU_DETECT 1
 #define CONFIG_POSTPROC 0
 #define CONFIG_VP9_POSTPROC 0
 #define CONFIG_MULTITHREAD 1
 #define CONFIG_INTERNAL_STATS 0
 #define CONFIG_VP8_ENCODER 0
 #define CONFIG_VP8_DECODER 1
 #define CONFIG_VP9_ENCODER 0
 #define CONFIG_VP9_DECODER 1
 #define CONFIG_VP8 1
 #define CONFIG_VP9 1
 #define CONFIG_ENCODERS 0
 #define CONFIG_DECODERS 1
 #define CONFIG_STATIC_MSVCRT 0
 #define CONFIG_SPATIAL_RESAMPLING 0
 #define CONFIG_REALTIME_ONLY 0
 #define CONFIG_ONTHEFLY_BITPACKING 0
 #define CONFIG_ERROR_CONCEALMENT 0
 #define CONFIG_SHARED 0
 #define CONFIG_STATIC 0
 #define CONFIG_SMALL 0
 #define CONFIG_POSTPROC_VISUALIZER 0
 #define CONFIG_OS_SUPPORT 1
 #define CONFIG_UNIT_TESTS 0
 #define CONFIG_WEBM_IO 0
 #define CONFIG_LIBYUV 0
 #define CONFIG_DECODE_PERF_TESTS 0
 #define CONFIG_ENCODE_PERF_TESTS 0
 #define CONFIG_MULTI_RES_ENCODING 0
 #define CONFIG_TEMPORAL_DENOISING 0
 #define CONFIG_VP9_TEMPORAL_DENOISING 0
 #define CONFIG_COEFFICIENT_RANGE_CHECKING 0
 #define CONFIG_VP9_HIGHBITDEPTH 0
 #define CONFIG_BETTER_HW_COMPATIBILITY 0
 #define CONFIG_EXPERIMENTAL 0
 #define CONFIG_SIZE_LIMIT 0
 #define CONFIG_SPATIAL_SVC 0
 #define CONFIG_FP_MB_STATS 0
 #define CONFIG_EMULATE_HARDWARE 0
 #define CONFIG_MISC_FIXES 0
 #endif /* VPX_CONFIG_H */
--- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm
+++ b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/intrapred_neon_asm.asm
@ -1,3 +1,5 @@
 ; This file was created from a .asm file
 ;  using the ads2armasm_ms.pl script.
 ;
 ;  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 ;
@ -20,11 +22,10 @@
    EXPORT  |vpx_tm_predictor_8x8_neon|
    EXPORT  |vpx_tm_predictor_16x16_neon|
    EXPORT  |vpx_tm_predictor_32x32_neon|
-    ARM
+    
-    REQUIRE8
+    
    PRESERVE8
-    AREA ||.text||, CODE, READONLY, ALIGN=2
+    AREA |.text|, CODE, READONLY, ALIGN=2
 ;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                              const uint8_t *above,
@ -42,6 +43,7 @@
    vst1.32             {d0[0]}, [r0], r1
    bx                  lr
    ENDP                ; |vpx_v_predictor_4x4_neon|
    ALIGN 4
 ;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                              const uint8_t *above,
@ -63,6 +65,7 @@
    vst1.8              {d0}, [r0], r1
    bx                  lr
    ENDP                ; |vpx_v_predictor_8x8_neon|
    ALIGN 4
 ;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                                const uint8_t *above,
@ -92,6 +95,7 @@
    vst1.8              {q0}, [r0], r1
    bx                  lr
    ENDP                ; |vpx_v_predictor_16x16_neon|
    ALIGN 4
 ;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                                const uint8_t *above,
@ -125,6 +129,7 @@ loop_v
    bgt                 loop_v
    bx                  lr
    ENDP                ; |vpx_v_predictor_32x32_neon|
    ALIGN 4
 ;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                              const uint8_t *above,
@ -146,6 +151,7 @@ loop_v
    vst1.32             {d0[0]}, [r0], r1
    bx                  lr
    ENDP                ; |vpx_h_predictor_4x4_neon|
    ALIGN 4
 ;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                              const uint8_t *above,
@ -175,6 +181,7 @@ loop_v
    vst1.64             {d0}, [r0], r1
    bx                  lr
    ENDP                ; |vpx_h_predictor_8x8_neon|
    ALIGN 4
 ;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                                const uint8_t *above,
@ -220,6 +227,7 @@ loop_v
    vst1.8              {q0}, [r0], r1
    bx                  lr
    ENDP                ; |vpx_h_predictor_16x16_neon|
    ALIGN 4
 ;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
 ;                                const uint8_t *above,
@ -286,6 +294,7 @@ loop_h
    bgt                 loop_h
    bx                  lr
    ENDP                ; |vpx_h_predictor_32x32_neon|
    ALIGN 4
 ;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
 ;                                const uint8_t *above,
@ -332,6 +341,7 @@ loop_h
    vst1.32             {d1[0]}, [r0], r1
    bx                  lr
    ENDP                ; |vpx_tm_predictor_4x4_neon|
    ALIGN 4
 ;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
 ;                                const uint8_t *above,
@ -404,6 +414,7 @@ loop_h
    bx                  lr
    ENDP                ; |vpx_tm_predictor_8x8_neon|
    ALIGN 4
 ;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
 ;                                const uint8_t *above,
@ -497,6 +508,7 @@ loop_16x16_neon
    bx                  lr
    ENDP                ; |vpx_tm_predictor_16x16_neon|
    ALIGN 4
 ;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
 ;                                  const uint8_t *above,
@ -626,5 +638,6 @@ loop_32x32_neon
    bx                  lr
    ENDP                ; |vpx_tm_predictor_32x32_neon|
    ALIGN 4
    END
--- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm
+++ b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/loopfilter_mb_neon.asm
@ -1,3 +1,5 @@
 ; This file was created from a .asm file
 ;  using the ads2armasm_ms.pl script.
 ;
 ;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
 ;
@ -11,9 +13,8 @@
    EXPORT  |vpx_lpf_horizontal_edge_8_neon|
    EXPORT  |vpx_lpf_horizontal_edge_16_neon|
    EXPORT  |vpx_lpf_vertical_16_neon|
    ARM
-    AREA ||.text||, CODE, READONLY, ALIGN=2
+    AREA |.text|, CODE, READONLY, ALIGN=2
 ; void mb_lpf_horizontal_edge(uint8_t *s, int p,
 ;                             const uint8_t *blimit,
@ -117,6 +118,7 @@ h_next
    pop         {r4-r8, pc}
    ENDP        ; |mb_lpf_horizontal_edge|
    ALIGN 4
 ; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
 ;                                     const uint8_t *blimit,
@ -131,6 +133,7 @@ h_next
    mov r12, #1
    b mb_lpf_horizontal_edge
    ENDP        ; |vpx_lpf_horizontal_edge_8_neon|
    ALIGN 4
 ; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
 ;                                      const uint8_t *blimit,
@ -145,6 +148,7 @@ h_next
    mov r12, #2
    b mb_lpf_horizontal_edge
    ENDP        ; |vpx_lpf_horizontal_edge_16_neon|
    ALIGN 4
 ; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
 ;                               const uint8_t *blimit,
@ -309,6 +313,7 @@ v_end
    pop         {r4-r8, pc}
    ENDP        ; |vpx_lpf_vertical_16_neon|
    ALIGN 4
 ; void vpx_wide_mbfilter_neon();
 ; This is a helper function for the loopfilters. The invidual functions do the
@ -631,5 +636,6 @@ v_end
    bx          lr
    ENDP        ; |vpx_wide_mbfilter_neon|
    ALIGN 4
    END
--- a/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm
+++ b/thirdparty/libvpx/vpx_dsp/arm/armasm_ms/save_reg_neon.asm
@ -1,3 +1,5 @@
 ; This file was created from a .asm file
 ;  using the ads2armasm_ms.pl script.
 ;
 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
@ -12,11 +14,10 @@
    EXPORT  |vpx_push_neon|
    EXPORT  |vpx_pop_neon|
-    ARM
+    
-    REQUIRE8
+    
    PRESERVE8
-    AREA ||.text||, CODE, READONLY, ALIGN=2
+    AREA |.text|, CODE, READONLY, ALIGN=2
 |vpx_push_neon| PROC
    vst1.i64            {d8, d9, d10, d11}, [r0]!
@ -24,6 +25,7 @@
    bx              lr
    ENDP
    ALIGN 4
 |vpx_pop_neon| PROC
    vld1.i64            {d8, d9, d10, d11}, [r0]!
@ -31,6 +33,7 @@
    bx              lr
    ENDP
    ALIGN 4
    END
--- a/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s
+++ b/thirdparty/libvpx/vpx_dsp/arm/gas/intrapred_neon_asm.s
@ -0,0 +1,658 @@
@ This file was created from a .asm file
@  using the ads2gas.pl script.
 	.equ DO1STROUNDING, 0
@
@  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
@
@  Use of this source code is governed by a BSD-style license
@  that can be found in the LICENSE file in the root of the source
@  tree. An additional intellectual property rights grant can be found
@  in the file PATENTS.  All contributing project authors may
@  be found in the AUTHORS file in the root of the source tree.
@
    .global vpx_v_predictor_4x4_neon 
 	.type vpx_v_predictor_4x4_neon, function
    .global vpx_v_predictor_8x8_neon 
 	.type vpx_v_predictor_8x8_neon, function
    .global vpx_v_predictor_16x16_neon 
 	.type vpx_v_predictor_16x16_neon, function
    .global vpx_v_predictor_32x32_neon 
 	.type vpx_v_predictor_32x32_neon, function
    .global vpx_h_predictor_4x4_neon 
 	.type vpx_h_predictor_4x4_neon, function
    .global vpx_h_predictor_8x8_neon 
 	.type vpx_h_predictor_8x8_neon, function
    .global vpx_h_predictor_16x16_neon 
 	.type vpx_h_predictor_16x16_neon, function
    .global vpx_h_predictor_32x32_neon 
 	.type vpx_h_predictor_32x32_neon, function
    .global vpx_tm_predictor_4x4_neon 
 	.type vpx_tm_predictor_4x4_neon, function
    .global vpx_tm_predictor_8x8_neon 
 	.type vpx_tm_predictor_8x8_neon, function
    .global vpx_tm_predictor_16x16_neon 
 	.type vpx_tm_predictor_16x16_neon, function
    .global vpx_tm_predictor_32x32_neon 
 	.type vpx_tm_predictor_32x32_neon, function
   .arm
   .eabi_attribute 24, 1 @Tag_ABI_align_needed
   .eabi_attribute 25, 1 @Tag_ABI_align_preserved
 .text
 .p2align 2
@void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
@                              const uint8_t *above,
@                              const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_v_predictor_4x4_neon:
 	vpx_v_predictor_4x4_neon: @ PROC
    vld1.32             {d0[0]}, [r2]
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d0[0]}, [r0], r1
    bx                  lr
 	.size vpx_v_predictor_4x4_neon, .-vpx_v_predictor_4x4_neon    @ ENDP                @ |vpx_v_predictor_4x4_neon|
@void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
@                              const uint8_t *above,
@                              const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_v_predictor_8x8_neon:
 	vpx_v_predictor_8x8_neon: @ PROC
    vld1.8              {d0}, [r2]
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    bx                  lr
 	.size vpx_v_predictor_8x8_neon, .-vpx_v_predictor_8x8_neon    @ ENDP                @ |vpx_v_predictor_8x8_neon|
@void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
@                                const uint8_t *above,
@                                const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_v_predictor_16x16_neon:
 	vpx_v_predictor_16x16_neon: @ PROC
    vld1.8              {q0}, [r2]
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    bx                  lr
 	.size vpx_v_predictor_16x16_neon, .-vpx_v_predictor_16x16_neon    @ ENDP                @ |vpx_v_predictor_16x16_neon|
@void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
@                                const uint8_t *above,
@                                const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_v_predictor_32x32_neon:
 	vpx_v_predictor_32x32_neon: @ PROC
    vld1.8              {q0, q1}, [r2]
    mov                 r2, #2
 loop_v:
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_v
    bx                  lr
 	.size vpx_v_predictor_32x32_neon, .-vpx_v_predictor_32x32_neon    @ ENDP                @ |vpx_v_predictor_32x32_neon|
@void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
@                              const uint8_t *above,
@                              const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_h_predictor_4x4_neon:
 	vpx_h_predictor_4x4_neon: @ PROC
    vld1.32             {d1[0]}, [r3]
    vdup.8              d0, d1[0]
    vst1.32             {d0[0]}, [r0], r1
    vdup.8              d0, d1[1]
    vst1.32             {d0[0]}, [r0], r1
    vdup.8              d0, d1[2]
    vst1.32             {d0[0]}, [r0], r1
    vdup.8              d0, d1[3]
    vst1.32             {d0[0]}, [r0], r1
    bx                  lr
 	.size vpx_h_predictor_4x4_neon, .-vpx_h_predictor_4x4_neon    @ ENDP                @ |vpx_h_predictor_4x4_neon|
@void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
@                              const uint8_t *above,
@                              const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_h_predictor_8x8_neon:
 	vpx_h_predictor_8x8_neon: @ PROC
    vld1.64             {d1}, [r3]
    vdup.8              d0, d1[0]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[1]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[2]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[3]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[4]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[5]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[6]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[7]
    vst1.64             {d0}, [r0], r1
    bx                  lr
 	.size vpx_h_predictor_8x8_neon, .-vpx_h_predictor_8x8_neon    @ ENDP                @ |vpx_h_predictor_8x8_neon|
@void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
@                                const uint8_t *above,
@                                const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_h_predictor_16x16_neon:
 	vpx_h_predictor_16x16_neon: @ PROC
    vld1.8              {q1}, [r3]
    vdup.8              q0, d2[0]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[1]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[2]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[3]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[4]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[5]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[6]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[7]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[0]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[1]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[2]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[3]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[4]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[5]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[6]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[7]
    vst1.8              {q0}, [r0], r1
    bx                  lr
 	.size vpx_h_predictor_16x16_neon, .-vpx_h_predictor_16x16_neon    @ ENDP                @ |vpx_h_predictor_16x16_neon|
@void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
@                                const uint8_t *above,
@                                const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_h_predictor_32x32_neon:
 	vpx_h_predictor_32x32_neon: @ PROC
    sub                 r1, r1, #16
    mov                 r2, #2
 loop_h:
    vld1.8              {q1}, [r3]!
    vdup.8              q0, d2[0]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[1]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[2]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[3]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[4]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[5]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[6]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[7]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[0]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[1]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[2]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[3]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[4]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[5]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[6]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[7]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_h
    bx                  lr
 	.size vpx_h_predictor_32x32_neon, .-vpx_h_predictor_32x32_neon    @ ENDP                @ |vpx_h_predictor_32x32_neon|
@void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
@                                const uint8_t *above,
@                                const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_tm_predictor_4x4_neon:
 	vpx_tm_predictor_4x4_neon: @ PROC
    @ Load ytop_left = above[-1];
    sub                 r12, r2, #1
    vld1.u8             {d0[]}, [r12]
    @ Load above 4 pixels
    vld1.32             {d2[0]}, [r2]
    @ Compute above - ytop_left
    vsubl.u8            q3, d2, d0
    @ Load left row by row and compute left + (above - ytop_left)
    @ 1st row and 2nd row
    vld1.u8             {d2[]}, [r3]!
    vld1.u8             {d4[]}, [r3]!
    vmovl.u8            q1, d2
    vmovl.u8            q2, d4
    vadd.s16            q1, q1, q3
    vadd.s16            q2, q2, q3
    vqmovun.s16         d0, q1
    vqmovun.s16         d1, q2
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d1[0]}, [r0], r1
    @ 3rd row and 4th row
    vld1.u8             {d2[]}, [r3]!
    vld1.u8             {d4[]}, [r3]
    vmovl.u8            q1, d2
    vmovl.u8            q2, d4
    vadd.s16            q1, q1, q3
    vadd.s16            q2, q2, q3
    vqmovun.s16         d0, q1
    vqmovun.s16         d1, q2
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d1[0]}, [r0], r1
    bx                  lr
 	.size vpx_tm_predictor_4x4_neon, .-vpx_tm_predictor_4x4_neon    @ ENDP                @ |vpx_tm_predictor_4x4_neon|
@void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
@                                const uint8_t *above,
@                                const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_tm_predictor_8x8_neon:
 	vpx_tm_predictor_8x8_neon: @ PROC
    @ Load ytop_left = above[-1];
    sub                 r12, r2, #1
    vld1.8              {d0[]}, [r12]
    @ preload 8 left
    vld1.8              {d30}, [r3]
    @ Load above 8 pixels
    vld1.64             {d2}, [r2]
    vmovl.u8            q10, d30
    @ Compute above - ytop_left
    vsubl.u8            q3, d2, d0
    @ Load left row by row and compute left + (above - ytop_left)
    @ 1st row and 2nd row
    vdup.16             q0, d20[0]
    vdup.16             q1, d20[1]
    vadd.s16            q0, q3, q0
    vadd.s16            q1, q3, q1
    @ 3rd row and 4th row
    vdup.16             q8, d20[2]
    vdup.16             q9, d20[3]
    vadd.s16            q8, q3, q8
    vadd.s16            q9, q3, q9
    vqmovun.s16         d0, q0
    vqmovun.s16         d1, q1
    vqmovun.s16         d2, q8
    vqmovun.s16         d3, q9
    vst1.64             {d0}, [r0], r1
    vst1.64             {d1}, [r0], r1
    vst1.64             {d2}, [r0], r1
    vst1.64             {d3}, [r0], r1
    @ 5th row and 6th row
    vdup.16             q0, d21[0]
    vdup.16             q1, d21[1]
    vadd.s16            q0, q3, q0
    vadd.s16            q1, q3, q1
    @ 7th row and 8th row
    vdup.16             q8, d21[2]
    vdup.16             q9, d21[3]
    vadd.s16            q8, q3, q8
    vadd.s16            q9, q3, q9
    vqmovun.s16         d0, q0
    vqmovun.s16         d1, q1
    vqmovun.s16         d2, q8
    vqmovun.s16         d3, q9
    vst1.64             {d0}, [r0], r1
    vst1.64             {d1}, [r0], r1
    vst1.64             {d2}, [r0], r1
    vst1.64             {d3}, [r0], r1
    bx                  lr
 	.size vpx_tm_predictor_8x8_neon, .-vpx_tm_predictor_8x8_neon    @ ENDP                @ |vpx_tm_predictor_8x8_neon|
@void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
@                                const uint8_t *above,
@                                const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_tm_predictor_16x16_neon:
 	vpx_tm_predictor_16x16_neon: @ PROC
    @ Load ytop_left = above[-1];
    sub                 r12, r2, #1
    vld1.8              {d0[]}, [r12]
    @ Load above 8 pixels
    vld1.8              {q1}, [r2]
    @ preload 8 left into r12
    vld1.8              {d18}, [r3]!
    @ Compute above - ytop_left
    vsubl.u8            q2, d2, d0
    vsubl.u8            q3, d3, d0
    vmovl.u8            q10, d18
    @ Load left row by row and compute left + (above - ytop_left)
    @ Process 8 rows in each single loop and loop 2 times to process 16 rows.
    mov                 r2, #2
 loop_16x16_neon:
    @ Process two rows.
    vdup.16             q0, d20[0]
    vdup.16             q8, d20[1]
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vdup.16             q0, d20[2]                  @ proload next 2 rows data
    vdup.16             q8, d20[3]
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
    @ Process two rows.
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vdup.16             q0, d21[0]                  @ proload next 2 rows data
    vdup.16             q8, d21[1]
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vdup.16             q0, d21[2]                  @ proload next 2 rows data
    vdup.16             q8, d21[3]
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vld1.8              {d18}, [r3]!                  @ preload 8 left into r12
    vmovl.u8            q10, d18
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_16x16_neon
    bx                  lr
 	.size vpx_tm_predictor_16x16_neon, .-vpx_tm_predictor_16x16_neon    @ ENDP                @ |vpx_tm_predictor_16x16_neon|
@void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
@                                  const uint8_t *above,
@                                  const uint8_t *left)
@ r0  uint8_t *dst
@ r1  ptrdiff_t y_stride
@ r2  const uint8_t *above
@ r3  const uint8_t *left
 _vpx_tm_predictor_32x32_neon:
 	vpx_tm_predictor_32x32_neon: @ PROC
    @ Load ytop_left = above[-1];
    sub                 r12, r2, #1
    vld1.8              {d0[]}, [r12]
    @ Load above 32 pixels
    vld1.8              {q1}, [r2]!
    vld1.8              {q2}, [r2]
    @ preload 8 left pixels
    vld1.8              {d26}, [r3]!
    @ Compute above - ytop_left
    vsubl.u8            q8, d2, d0
    vsubl.u8            q9, d3, d0
    vsubl.u8            q10, d4, d0
    vsubl.u8            q11, d5, d0
    vmovl.u8            q3, d26
    @ Load left row by row and compute left + (above - ytop_left)
    @ Process 8 rows in each single loop and loop 4 times to process 32 rows.
    mov                 r2, #4
 loop_32x32_neon:
    @ Process two rows.
    vdup.16             q0, d6[0]
    vdup.16             q2, d6[1]
    vadd.s16            q12, q0, q8
    vadd.s16            q13, q0, q9
    vadd.s16            q14, q0, q10
    vadd.s16            q15, q0, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vdup.16             q1, d6[2]
    vdup.16             q2, d6[3]
    vst1.64             {d24-d27}, [r0], r1
    @ Process two rows.
    vadd.s16            q12, q1, q8
    vadd.s16            q13, q1, q9
    vadd.s16            q14, q1, q10
    vadd.s16            q15, q1, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vdup.16             q0, d7[0]
    vdup.16             q2, d7[1]
    vst1.64             {d24-d27}, [r0], r1
    @ Process two rows.
    vadd.s16            q12, q0, q8
    vadd.s16            q13, q0, q9
    vadd.s16            q14, q0, q10
    vadd.s16            q15, q0, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vdup.16             q0, d7[2]
    vdup.16             q2, d7[3]
    vst1.64             {d24-d27}, [r0], r1
    @ Process two rows.
    vadd.s16            q12, q0, q8
    vadd.s16            q13, q0, q9
    vadd.s16            q14, q0, q10
    vadd.s16            q15, q0, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vld1.8              {d0}, [r3]!                   @ preload 8 left pixels
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vmovl.u8            q3, d0
    vst1.64             {d24-d27}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_32x32_neon
    bx                  lr
 	.size vpx_tm_predictor_32x32_neon, .-vpx_tm_predictor_32x32_neon    @ ENDP                @ |vpx_tm_predictor_32x32_neon|
 	.section	.note.GNU-stack,"",%progbits
--- a/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s
+++ b/thirdparty/libvpx/vpx_dsp/arm/gas/loopfilter_mb_neon.s
@ -0,0 +1,647 @@
@ This file was created from a .asm file
@  using the ads2gas.pl script.
 	.equ DO1STROUNDING, 0
@
@  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
@
@  Use of this source code is governed by a BSD-style license
@  that can be found in the LICENSE file in the root of the source
@  tree. An additional intellectual property rights grant can be found
@  in the file PATENTS.  All contributing project authors may
@  be found in the AUTHORS file in the root of the source tree.
@
    .global vpx_lpf_horizontal_edge_8_neon 
 	.type vpx_lpf_horizontal_edge_8_neon, function
    .global vpx_lpf_horizontal_edge_16_neon 
 	.type vpx_lpf_horizontal_edge_16_neon, function
    .global vpx_lpf_vertical_16_neon 
 	.type vpx_lpf_vertical_16_neon, function
   .arm
 .text
 .p2align 2
@ void mb_lpf_horizontal_edge(uint8_t *s, int p,
@                             const uint8_t *blimit,
@                             const uint8_t *limit,
@                             const uint8_t *thresh,
@                             int count)
@ r0    uint8_t *s,
@ r1    int p, /* pitch */
@ r2    const uint8_t *blimit,
@ r3    const uint8_t *limit,
@ sp    const uint8_t *thresh,
@ r12   int count
 _mb_lpf_horizontal_edge:
 	mb_lpf_horizontal_edge: @ PROC
    push        {r4-r8, lr}
    vpush       {d8-d15}
    ldr         r4, [sp, #88]              @ load thresh
 h_count:
    vld1.8      {d16[]}, [r2]              @ load *blimit
    vld1.8      {d17[]}, [r3]              @ load *limit
    vld1.8      {d18[]}, [r4]              @ load *thresh
    sub         r8, r0, r1, lsl #3         @ move src pointer down by 8 lines
    vld1.u8     {d0}, [r8,:64], r1          @ p7
    vld1.u8     {d1}, [r8,:64], r1          @ p6
    vld1.u8     {d2}, [r8,:64], r1          @ p5
    vld1.u8     {d3}, [r8,:64], r1          @ p4
    vld1.u8     {d4}, [r8,:64], r1          @ p3
    vld1.u8     {d5}, [r8,:64], r1          @ p2
    vld1.u8     {d6}, [r8,:64], r1          @ p1
    vld1.u8     {d7}, [r8,:64], r1          @ p0
    vld1.u8     {d8}, [r8,:64], r1          @ q0
    vld1.u8     {d9}, [r8,:64], r1          @ q1
    vld1.u8     {d10}, [r8,:64], r1         @ q2
    vld1.u8     {d11}, [r8,:64], r1         @ q3
    vld1.u8     {d12}, [r8,:64], r1         @ q4
    vld1.u8     {d13}, [r8,:64], r1         @ q5
    vld1.u8     {d14}, [r8,:64], r1         @ q6
    vld1.u8     {d15}, [r8,:64], r1         @ q7
    bl          vpx_wide_mbfilter_neon
    tst         r7, #1
    beq         h_mbfilter
    @ flat && mask were not set for any of the channels. Just store the values
    @ from filter.
    sub         r8, r0, r1, lsl #1
    vst1.u8     {d25}, [r8,:64], r1         @ store op1
    vst1.u8     {d24}, [r8,:64], r1         @ store op0
    vst1.u8     {d23}, [r8,:64], r1         @ store oq0
    vst1.u8     {d26}, [r8,:64], r1         @ store oq1
    b           h_next
 h_mbfilter:
    tst         r7, #2
    beq         h_wide_mbfilter
    @ flat2 was not set for any of the channels. Just store the values from
    @ mbfilter.
    sub         r8, r0, r1, lsl #1
    sub         r8, r8, r1
    vst1.u8     {d18}, [r8,:64], r1         @ store op2
    vst1.u8     {d19}, [r8,:64], r1         @ store op1
    vst1.u8     {d20}, [r8,:64], r1         @ store op0
    vst1.u8     {d21}, [r8,:64], r1         @ store oq0
    vst1.u8     {d22}, [r8,:64], r1         @ store oq1
    vst1.u8     {d23}, [r8,:64], r1         @ store oq2
    b           h_next
 h_wide_mbfilter:
    sub         r8, r0, r1, lsl #3
    add         r8, r8, r1
    vst1.u8     {d16}, [r8,:64], r1         @ store op6
    vst1.u8     {d24}, [r8,:64], r1         @ store op5
    vst1.u8     {d25}, [r8,:64], r1         @ store op4
    vst1.u8     {d26}, [r8,:64], r1         @ store op3
    vst1.u8     {d27}, [r8,:64], r1         @ store op2
    vst1.u8     {d18}, [r8,:64], r1         @ store op1
    vst1.u8     {d19}, [r8,:64], r1         @ store op0
    vst1.u8     {d20}, [r8,:64], r1         @ store oq0
    vst1.u8     {d21}, [r8,:64], r1         @ store oq1
    vst1.u8     {d22}, [r8,:64], r1         @ store oq2
    vst1.u8     {d23}, [r8,:64], r1         @ store oq3
    vst1.u8     {d1}, [r8,:64], r1          @ store oq4
    vst1.u8     {d2}, [r8,:64], r1          @ store oq5
    vst1.u8     {d3}, [r8,:64], r1          @ store oq6
 h_next:
    add         r0, r0, #8
    subs        r12, r12, #1
    bne         h_count
    vpop        {d8-d15}
    pop         {r4-r8, pc}
 	.size mb_lpf_horizontal_edge, .-mb_lpf_horizontal_edge    @ ENDP        @ |mb_lpf_horizontal_edge|
@ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
@                                     const uint8_t *blimit,
@                                     const uint8_t *limit,
@                                     const uint8_t *thresh)
@ r0    uint8_t *s,
@ r1    int pitch,
@ r2    const uint8_t *blimit,
@ r3    const uint8_t *limit,
@ sp    const uint8_t *thresh
 _vpx_lpf_horizontal_edge_8_neon:
 	vpx_lpf_horizontal_edge_8_neon: @ PROC
    mov r12, #1
    b mb_lpf_horizontal_edge
 	.size vpx_lpf_horizontal_edge_8_neon, .-vpx_lpf_horizontal_edge_8_neon    @ ENDP        @ |vpx_lpf_horizontal_edge_8_neon|
@ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
@                                      const uint8_t *blimit,
@                                      const uint8_t *limit,
@                                      const uint8_t *thresh)
@ r0    uint8_t *s,
@ r1    int pitch,
@ r2    const uint8_t *blimit,
@ r3    const uint8_t *limit,
@ sp    const uint8_t *thresh
 _vpx_lpf_horizontal_edge_16_neon:
 	vpx_lpf_horizontal_edge_16_neon: @ PROC
    mov r12, #2
    b mb_lpf_horizontal_edge
 	.size vpx_lpf_horizontal_edge_16_neon, .-vpx_lpf_horizontal_edge_16_neon    @ ENDP        @ |vpx_lpf_horizontal_edge_16_neon|
@ void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
@                               const uint8_t *blimit,
@                               const uint8_t *limit,
@                               const uint8_t *thresh)
@ r0    uint8_t *s,
@ r1    int p, /* pitch */
@ r2    const uint8_t *blimit,
@ r3    const uint8_t *limit,
@ sp    const uint8_t *thresh,
 _vpx_lpf_vertical_16_neon:
 	vpx_lpf_vertical_16_neon: @ PROC
    push        {r4-r8, lr}
    vpush       {d8-d15}
    ldr         r4, [sp, #88]              @ load thresh
    vld1.8      {d16[]}, [r2]              @ load *blimit
    vld1.8      {d17[]}, [r3]              @ load *limit
    vld1.8      {d18[]}, [r4]              @ load *thresh
    sub         r8, r0, #8
    vld1.8      {d0}, [r8,:64], r1
    vld1.8      {d8}, [r0,:64], r1
    vld1.8      {d1}, [r8,:64], r1
    vld1.8      {d9}, [r0,:64], r1
    vld1.8      {d2}, [r8,:64], r1
    vld1.8      {d10}, [r0,:64], r1
    vld1.8      {d3}, [r8,:64], r1
    vld1.8      {d11}, [r0,:64], r1
    vld1.8      {d4}, [r8,:64], r1
    vld1.8      {d12}, [r0,:64], r1
    vld1.8      {d5}, [r8,:64], r1
    vld1.8      {d13}, [r0,:64], r1
    vld1.8      {d6}, [r8,:64], r1
    vld1.8      {d14}, [r0,:64], r1
    vld1.8      {d7}, [r8,:64], r1
    vld1.8      {d15}, [r0,:64], r1
    sub         r0, r0, r1, lsl #3
    vtrn.32     q0, q2
    vtrn.32     q1, q3
    vtrn.32     q4, q6
    vtrn.32     q5, q7
    vtrn.16     q0, q1
    vtrn.16     q2, q3
    vtrn.16     q4, q5
    vtrn.16     q6, q7
    vtrn.8      d0, d1
    vtrn.8      d2, d3
    vtrn.8      d4, d5
    vtrn.8      d6, d7
    vtrn.8      d8, d9
    vtrn.8      d10, d11
    vtrn.8      d12, d13
    vtrn.8      d14, d15
    bl          vpx_wide_mbfilter_neon
    tst         r7, #1
    beq         v_mbfilter
    @ flat && mask were not set for any of the channels. Just store the values
    @ from filter.
    sub         r8, r0, #2
    vswp        d23, d25
    vst4.8      {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
    vst4.8      {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
    vst4.8      {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
    vst4.8      {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
    vst4.8      {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
    vst4.8      {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
    vst4.8      {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
    vst4.8      {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
    b           v_end
 v_mbfilter:
    tst         r7, #2
    beq         v_wide_mbfilter
    @ flat2 was not set for any of the channels. Just store the values from
    @ mbfilter.
    sub         r8, r0, #3
    vst3.8      {d18[0], d19[0], d20[0]}, [r8], r1
    vst3.8      {d21[0], d22[0], d23[0]}, [r0], r1
    vst3.8      {d18[1], d19[1], d20[1]}, [r8], r1
    vst3.8      {d21[1], d22[1], d23[1]}, [r0], r1
    vst3.8      {d18[2], d19[2], d20[2]}, [r8], r1
    vst3.8      {d21[2], d22[2], d23[2]}, [r0], r1
    vst3.8      {d18[3], d19[3], d20[3]}, [r8], r1
    vst3.8      {d21[3], d22[3], d23[3]}, [r0], r1
    vst3.8      {d18[4], d19[4], d20[4]}, [r8], r1
    vst3.8      {d21[4], d22[4], d23[4]}, [r0], r1
    vst3.8      {d18[5], d19[5], d20[5]}, [r8], r1
    vst3.8      {d21[5], d22[5], d23[5]}, [r0], r1
    vst3.8      {d18[6], d19[6], d20[6]}, [r8], r1
    vst3.8      {d21[6], d22[6], d23[6]}, [r0], r1
    vst3.8      {d18[7], d19[7], d20[7]}, [r8], r1
    vst3.8      {d21[7], d22[7], d23[7]}, [r0], r1
    b           v_end
 v_wide_mbfilter:
    sub         r8, r0, #8
    vtrn.32     d0,  d26
    vtrn.32     d16, d27
    vtrn.32     d24, d18
    vtrn.32     d25, d19
    vtrn.16     d0,  d24
    vtrn.16     d16, d25
    vtrn.16     d26, d18
    vtrn.16     d27, d19
    vtrn.8      d0,  d16
    vtrn.8      d24, d25
    vtrn.8      d26, d27
    vtrn.8      d18, d19
    vtrn.32     d20, d1
    vtrn.32     d21, d2
    vtrn.32     d22, d3
    vtrn.32     d23, d15
    vtrn.16     d20, d22
    vtrn.16     d21, d23
    vtrn.16     d1,  d3
    vtrn.16     d2,  d15
    vtrn.8      d20, d21
    vtrn.8      d22, d23
    vtrn.8      d1,  d2
    vtrn.8      d3,  d15
    vst1.8      {d0}, [r8,:64], r1
    vst1.8      {d20}, [r0,:64], r1
    vst1.8      {d16}, [r8,:64], r1
    vst1.8      {d21}, [r0,:64], r1
    vst1.8      {d24}, [r8,:64], r1
    vst1.8      {d22}, [r0,:64], r1
    vst1.8      {d25}, [r8,:64], r1
    vst1.8      {d23}, [r0,:64], r1
    vst1.8      {d26}, [r8,:64], r1
    vst1.8      {d1}, [r0,:64], r1
    vst1.8      {d27}, [r8,:64], r1
    vst1.8      {d2}, [r0,:64], r1
    vst1.8      {d18}, [r8,:64], r1
    vst1.8      {d3}, [r0,:64], r1
    vst1.8      {d19}, [r8,:64], r1
    vst1.8      {d15}, [r0,:64], r1
 v_end:
    vpop        {d8-d15}
    pop         {r4-r8, pc}
 	.size vpx_lpf_vertical_16_neon, .-vpx_lpf_vertical_16_neon    @ ENDP        @ |vpx_lpf_vertical_16_neon|
@ void vpx_wide_mbfilter_neon();
@ This is a helper function for the loopfilters. The invidual functions do the
@ necessary load, transpose (if necessary) and store.
@
@ r0-r3 PRESERVE
@ d16    blimit
@ d17    limit
@ d18    thresh
@ d0    p7
@ d1    p6
@ d2    p5
@ d3    p4
@ d4    p3
@ d5    p2
@ d6    p1
@ d7    p0
@ d8    q0
@ d9    q1
@ d10   q2
@ d11   q3
@ d12   q4
@ d13   q5
@ d14   q6
@ d15   q7
 _vpx_wide_mbfilter_neon:
 	vpx_wide_mbfilter_neon: @ PROC
    mov         r7, #0
    @ filter_mask
    vabd.u8     d19, d4, d5                @ abs(p3 - p2)
    vabd.u8     d20, d5, d6                @ abs(p2 - p1)
    vabd.u8     d21, d6, d7                @ abs(p1 - p0)
    vabd.u8     d22, d9, d8                @ abs(q1 - q0)
    vabd.u8     d23, d10, d9               @ abs(q2 - q1)
    vabd.u8     d24, d11, d10              @ abs(q3 - q2)
    @ only compare the largest value to limit
    vmax.u8     d19, d19, d20              @ max(abs(p3 - p2), abs(p2 - p1))
    vmax.u8     d20, d21, d22              @ max(abs(p1 - p0), abs(q1 - q0))
    vmax.u8     d23, d23, d24              @ max(abs(q2 - q1), abs(q3 - q2))
    vmax.u8     d19, d19, d20
    vabd.u8     d24, d7, d8                @ abs(p0 - q0)
    vmax.u8     d19, d19, d23
    vabd.u8     d23, d6, d9                @ a = abs(p1 - q1)
    vqadd.u8    d24, d24, d24              @ b = abs(p0 - q0) * 2
    @ abs () > limit
    vcge.u8     d19, d17, d19
    @ flatmask4
    vabd.u8     d25, d7, d5                @ abs(p0 - p2)
    vabd.u8     d26, d8, d10               @ abs(q0 - q2)
    vabd.u8     d27, d4, d7                @ abs(p3 - p0)
    vabd.u8     d28, d11, d8               @ abs(q3 - q0)
    @ only compare the largest value to thresh
    vmax.u8     d25, d25, d26              @ max(abs(p0 - p2), abs(q0 - q2))
    vmax.u8     d26, d27, d28              @ max(abs(p3 - p0), abs(q3 - q0))
    vmax.u8     d25, d25, d26
    vmax.u8     d20, d20, d25
    vshr.u8     d23, d23, #1               @ a = a / 2
    vqadd.u8    d24, d24, d23              @ a = b + a
    vmov.u8     d30, #1
    vcge.u8     d24, d16, d24              @ (a > blimit * 2 + limit) * -1
    vcge.u8     d20, d30, d20              @ flat
    vand        d19, d19, d24              @ mask
    @ hevmask
    vcgt.u8     d21, d21, d18              @ (abs(p1 - p0) > thresh)*-1
    vcgt.u8     d22, d22, d18              @ (abs(q1 - q0) > thresh)*-1
    vorr        d21, d21, d22              @ hev
    vand        d16, d20, d19              @ flat && mask
    vmov        r5, r6, d16
    @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
    vabd.u8     d22, d3, d7                @ abs(p4 - p0)
    vabd.u8     d23, d12, d8               @ abs(q4 - q0)
    vabd.u8     d24, d7, d2                @ abs(p0 - p5)
    vabd.u8     d25, d8, d13               @ abs(q0 - q5)
    vabd.u8     d26, d1, d7                @ abs(p6 - p0)
    vabd.u8     d27, d14, d8               @ abs(q6 - q0)
    vabd.u8     d28, d0, d7                @ abs(p7 - p0)
    vabd.u8     d29, d15, d8               @ abs(q7 - q0)
    @ only compare the largest value to thresh
    vmax.u8     d22, d22, d23              @ max(abs(p4 - p0), abs(q4 - q0))
    vmax.u8     d23, d24, d25              @ max(abs(p0 - p5), abs(q0 - q5))
    vmax.u8     d24, d26, d27              @ max(abs(p6 - p0), abs(q6 - q0))
    vmax.u8     d25, d28, d29              @ max(abs(p7 - p0), abs(q7 - q0))
    vmax.u8     d26, d22, d23
    vmax.u8     d27, d24, d25
    vmax.u8     d23, d26, d27
    vcge.u8     d18, d30, d23              @ flat2
    vmov.u8     d22, #0x80
    orrs        r5, r5, r6                 @ Check for 0
    orreq       r7, r7, #1                 @ Only do filter branch
    vand        d17, d18, d16              @ flat2 && flat && mask
    vmov        r5, r6, d17
    @ mbfilter() function
    @ filter() function
    @ convert to signed
    veor        d23, d8, d22               @ qs0
    veor        d24, d7, d22               @ ps0
    veor        d25, d6, d22               @ ps1
    veor        d26, d9, d22               @ qs1
    vmov.u8     d27, #3
    vsub.s8     d28, d23, d24              @ ( qs0 - ps0)
    vqsub.s8    d29, d25, d26              @ filter = clamp(ps1-qs1)
    vmull.s8    q15, d28, d27              @ 3 * ( qs0 - ps0)
    vand        d29, d29, d21              @ filter &= hev
    vaddw.s8    q15, q15, d29              @ filter + 3 * (qs0 - ps0)
    vmov.u8     d29, #4
    @ filter = clamp(filter + 3 * ( qs0 - ps0))
    vqmovn.s16  d28, q15
    vand        d28, d28, d19              @ filter &= mask
    vqadd.s8    d30, d28, d27              @ filter2 = clamp(filter+3)
    vqadd.s8    d29, d28, d29              @ filter1 = clamp(filter+4)
    vshr.s8     d30, d30, #3               @ filter2 >>= 3
    vshr.s8     d29, d29, #3               @ filter1 >>= 3
    vqadd.s8    d24, d24, d30              @ op0 = clamp(ps0 + filter2)
    vqsub.s8    d23, d23, d29              @ oq0 = clamp(qs0 - filter1)
    @ outer tap adjustments: ++filter1 >> 1
    vrshr.s8    d29, d29, #1
    vbic        d29, d29, d21              @ filter &= ~hev
    vqadd.s8    d25, d25, d29              @ op1 = clamp(ps1 + filter)
    vqsub.s8    d26, d26, d29              @ oq1 = clamp(qs1 - filter)
    veor        d24, d24, d22              @ *f_op0 = u^0x80
    veor        d23, d23, d22              @ *f_oq0 = u^0x80
    veor        d25, d25, d22              @ *f_op1 = u^0x80
    veor        d26, d26, d22              @ *f_oq1 = u^0x80
    tst         r7, #1
    bxne        lr
    orrs        r5, r5, r6                 @ Check for 0
    orreq       r7, r7, #2                 @ Only do mbfilter branch
    @ mbfilter flat && mask branch
    @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's
    @ and using vibt on the q's?
    vmov.u8     d29, #2
    vaddl.u8    q15, d7, d8                @ op2 = p0 + q0
    vmlal.u8    q15, d4, d27               @ op2 = p0 + q0 + p3 * 3
    vmlal.u8    q15, d5, d29               @ op2 = p0 + q0 + p3 * 3 + p2 * 2
    vaddl.u8    q10, d4, d5
    vaddw.u8    q15, d6                    @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2
    vaddl.u8    q14, d6, d9
    vqrshrn.u16 d18, q15, #3               @ r_op2
    vsub.i16    q15, q10
    vaddl.u8    q10, d4, d6
    vadd.i16    q15, q14
    vaddl.u8    q14, d7, d10
    vqrshrn.u16 d19, q15, #3               @ r_op1
    vsub.i16    q15, q10
    vadd.i16    q15, q14
    vaddl.u8    q14, d8, d11
    vqrshrn.u16 d20, q15, #3               @ r_op0
    vsubw.u8    q15, d4                    @ oq0 = op0 - p3
    vsubw.u8    q15, d7                    @ oq0 -= p0
    vadd.i16    q15, q14
    vaddl.u8    q14, d9, d11
    vqrshrn.u16 d21, q15, #3               @ r_oq0
    vsubw.u8    q15, d5                    @ oq1 = oq0 - p2
    vsubw.u8    q15, d8                    @ oq1 -= q0
    vadd.i16    q15, q14
    vaddl.u8    q14, d10, d11
    vqrshrn.u16 d22, q15, #3               @ r_oq1
    vsubw.u8    q15, d6                    @ oq2 = oq0 - p1
    vsubw.u8    q15, d9                    @ oq2 -= q1
    vadd.i16    q15, q14
    vqrshrn.u16 d27, q15, #3               @ r_oq2
    @ Filter does not set op2 or oq2, so use p2 and q2.
    vbif        d18, d5, d16               @ t_op2 |= p2 & ~(flat & mask)
    vbif        d19, d25, d16              @ t_op1 |= f_op1 & ~(flat & mask)
    vbif        d20, d24, d16              @ t_op0 |= f_op0 & ~(flat & mask)
    vbif        d21, d23, d16              @ t_oq0 |= f_oq0 & ~(flat & mask)
    vbif        d22, d26, d16              @ t_oq1 |= f_oq1 & ~(flat & mask)
    vbit        d23, d27, d16              @ t_oq2 |= r_oq2 & (flat & mask)
    vbif        d23, d10, d16              @ t_oq2 |= q2 & ~(flat & mask)
    tst         r7, #2
    bxne        lr
    @ wide_mbfilter flat2 && flat && mask branch
    vmov.u8     d16, #7
    vaddl.u8    q15, d7, d8                @ op6 = p0 + q0
    vaddl.u8    q12, d2, d3
    vaddl.u8    q13, d4, d5
    vaddl.u8    q14, d1, d6
    vmlal.u8    q15, d0, d16               @ op6 += p7 * 3
    vadd.i16    q12, q13
    vadd.i16    q15, q14
    vaddl.u8    q14, d2, d9
    vadd.i16    q15, q12
    vaddl.u8    q12, d0, d1
    vaddw.u8    q15, d1
    vaddl.u8    q13, d0, d2
    vadd.i16    q14, q15, q14
    vqrshrn.u16 d16, q15, #4               @ w_op6
    vsub.i16    q15, q14, q12
    vaddl.u8    q14, d3, d10
    vqrshrn.u16 d24, q15, #4               @ w_op5
    vsub.i16    q15, q13
    vaddl.u8    q13, d0, d3
    vadd.i16    q15, q14
    vaddl.u8    q14, d4, d11
    vqrshrn.u16 d25, q15, #4               @ w_op4
    vadd.i16    q15, q14
    vaddl.u8    q14, d0, d4
    vsub.i16    q15, q13
    vsub.i16    q14, q15, q14
    vqrshrn.u16 d26, q15, #4               @ w_op3
    vaddw.u8    q15, q14, d5               @ op2 += p2
    vaddl.u8    q14, d0, d5
    vaddw.u8    q15, d12                   @ op2 += q4
    vbif        d26, d4, d17               @ op3 |= p3 & ~(f2 & f & m)
    vqrshrn.u16 d27, q15, #4               @ w_op2
    vsub.i16    q15, q14
    vaddl.u8    q14, d0, d6
    vaddw.u8    q15, d6                    @ op1 += p1
    vaddw.u8    q15, d13                   @ op1 += q5
    vbif        d27, d18, d17              @ op2 |= t_op2 & ~(f2 & f & m)
    vqrshrn.u16 d18, q15, #4               @ w_op1
    vsub.i16    q15, q14
    vaddl.u8    q14, d0, d7
    vaddw.u8    q15, d7                    @ op0 += p0
    vaddw.u8    q15, d14                   @ op0 += q6
    vbif        d18, d19, d17              @ op1 |= t_op1 & ~(f2 & f & m)
    vqrshrn.u16 d19, q15, #4               @ w_op0
    vsub.i16    q15, q14
    vaddl.u8    q14, d1, d8
    vaddw.u8    q15, d8                    @ oq0 += q0
    vaddw.u8    q15, d15                   @ oq0 += q7
    vbif        d19, d20, d17              @ op0 |= t_op0 & ~(f2 & f & m)
    vqrshrn.u16 d20, q15, #4               @ w_oq0
    vsub.i16    q15, q14
    vaddl.u8    q14, d2, d9
    vaddw.u8    q15, d9                    @ oq1 += q1
    vaddl.u8    q4, d10, d15
    vaddw.u8    q15, d15                   @ oq1 += q7
    vbif        d20, d21, d17              @ oq0 |= t_oq0 & ~(f2 & f & m)
    vqrshrn.u16 d21, q15, #4               @ w_oq1
    vsub.i16    q15, q14
    vaddl.u8    q14, d3, d10
    vadd.i16    q15, q4
    vaddl.u8    q4, d11, d15
    vbif        d21, d22, d17              @ oq1 |= t_oq1 & ~(f2 & f & m)
    vqrshrn.u16 d22, q15, #4               @ w_oq2
    vsub.i16    q15, q14
    vaddl.u8    q14, d4, d11
    vadd.i16    q15, q4
    vaddl.u8    q4, d12, d15
    vbif        d22, d23, d17              @ oq2 |= t_oq2 & ~(f2 & f & m)
    vqrshrn.u16 d23, q15, #4               @ w_oq3
    vsub.i16    q15, q14
    vaddl.u8    q14, d5, d12
    vadd.i16    q15, q4
    vaddl.u8    q4, d13, d15
    vbif        d16, d1, d17               @ op6 |= p6 & ~(f2 & f & m)
    vqrshrn.u16 d1, q15, #4                @ w_oq4
    vsub.i16    q15, q14
    vaddl.u8    q14, d6, d13
    vadd.i16    q15, q4
    vaddl.u8    q4, d14, d15
    vbif        d24, d2, d17               @ op5 |= p5 & ~(f2 & f & m)
    vqrshrn.u16 d2, q15, #4                @ w_oq5
    vsub.i16    q15, q14
    vbif        d25, d3, d17               @ op4 |= p4 & ~(f2 & f & m)
    vadd.i16    q15, q4
    vbif        d23, d11, d17              @ oq3 |= q3 & ~(f2 & f & m)
    vqrshrn.u16 d3, q15, #4                @ w_oq6
    vbif        d1, d12, d17               @ oq4 |= q4 & ~(f2 & f & m)
    vbif        d2, d13, d17               @ oq5 |= q5 & ~(f2 & f & m)
    vbif        d3, d14, d17               @ oq6 |= q6 & ~(f2 & f & m)
    bx          lr
 	.size vpx_wide_mbfilter_neon, .-vpx_wide_mbfilter_neon    @ ENDP        @ |vpx_wide_mbfilter_neon|
 	.section	.note.GNU-stack,"",%progbits
--- a/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s
+++ b/thirdparty/libvpx/vpx_dsp/arm/gas/save_reg_neon.s
@ -0,0 +1,44 @@
@ This file was created from a .asm file
@  using the ads2gas.pl script.
 	.equ DO1STROUNDING, 0
@
@  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
@
@  Use of this source code is governed by a BSD-style license
@  that can be found in the LICENSE file in the root of the source
@  tree. An additional intellectual property rights grant can be found
@  in the file PATENTS.  All contributing project authors may
@  be found in the AUTHORS file in the root of the source tree.
@
    .global vpx_push_neon 
 	.type vpx_push_neon, function
    .global vpx_pop_neon 
 	.type vpx_pop_neon, function
   .arm
   .eabi_attribute 24, 1 @Tag_ABI_align_needed
   .eabi_attribute 25, 1 @Tag_ABI_align_preserved
 .text
 .p2align 2
 _vpx_push_neon:
 	vpx_push_neon: @ PROC
    vst1.i64            {d8, d9, d10, d11}, [r0]!
    vst1.i64            {d12, d13, d14, d15}, [r0]!
    bx              lr
 	.size vpx_push_neon, .-vpx_push_neon    @ ENDP
 _vpx_pop_neon:
 	vpx_pop_neon: @ PROC
    vld1.i64            {d8, d9, d10, d11}, [r0]!
    vld1.i64            {d12, d13, d14, d15}, [r0]!
    bx              lr
 	.size vpx_pop_neon, .-vpx_pop_neon    @ ENDP
 	.section	.note.GNU-stack,"",%progbits
--- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s
+++ b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/intrapred_neon_asm.s
@ -0,0 +1,660 @@
@ This file was created from a .asm file
@  using the ads2gas_apple.pl script.
 	.set WIDE_REFERENCE, 0
 	.set ARCHITECTURE, 5
 	.set DO1STROUNDING, 0
 @
 @  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
 @
 @  Use of this source code is governed by a BSD-style license
 @  that can be found in the LICENSE file in the root of the source
 @  tree. An additional intellectual property rights grant can be found
 @  in the file PATENTS.  All contributing project authors may
 @  be found in the AUTHORS file in the root of the source tree.
 @
    .globl _vpx_v_predictor_4x4_neon
 	.globl vpx_v_predictor_4x4_neon
    .globl _vpx_v_predictor_8x8_neon
 	.globl vpx_v_predictor_8x8_neon
    .globl _vpx_v_predictor_16x16_neon
 	.globl vpx_v_predictor_16x16_neon
    .globl _vpx_v_predictor_32x32_neon
 	.globl vpx_v_predictor_32x32_neon
    .globl _vpx_h_predictor_4x4_neon
 	.globl vpx_h_predictor_4x4_neon
    .globl _vpx_h_predictor_8x8_neon
 	.globl vpx_h_predictor_8x8_neon
    .globl _vpx_h_predictor_16x16_neon
 	.globl vpx_h_predictor_16x16_neon
    .globl _vpx_h_predictor_32x32_neon
 	.globl vpx_h_predictor_32x32_neon
    .globl _vpx_tm_predictor_4x4_neon
 	.globl vpx_tm_predictor_4x4_neon
    .globl _vpx_tm_predictor_8x8_neon
 	.globl vpx_tm_predictor_8x8_neon
    .globl _vpx_tm_predictor_16x16_neon
 	.globl vpx_tm_predictor_16x16_neon
    .globl _vpx_tm_predictor_32x32_neon
 	.globl vpx_tm_predictor_32x32_neon
   @ ARM
   @ 
   @ PRESERVE8
 .text
 .p2align 2
 @void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                              const uint8_t *above,
 @                              const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_v_predictor_4x4_neon:
 	vpx_v_predictor_4x4_neon: @
    vld1.32             {d0[0]}, [r2]
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d0[0]}, [r0], r1
    bx                  lr
    @                 @ |vpx_v_predictor_4x4_neon|
 @void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                              const uint8_t *above,
 @                              const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_v_predictor_8x8_neon:
 	vpx_v_predictor_8x8_neon: @
    vld1.8              {d0}, [r2]
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    vst1.8              {d0}, [r0], r1
    bx                  lr
    @                 @ |vpx_v_predictor_8x8_neon|
 @void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                                const uint8_t *above,
 @                                const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_v_predictor_16x16_neon:
 	vpx_v_predictor_16x16_neon: @
    vld1.8              {q0}, [r2]
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    vst1.8              {q0}, [r0], r1
    bx                  lr
    @                 @ |vpx_v_predictor_16x16_neon|
 @void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                                const uint8_t *above,
 @                                const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_v_predictor_32x32_neon:
 	vpx_v_predictor_32x32_neon: @
    vld1.8              {q0, q1}, [r2]
    mov                 r2, #2
 loop_v:
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    vst1.8              {q0, q1}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_v
    bx                  lr
    @                 @ |vpx_v_predictor_32x32_neon|
 @void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                              const uint8_t *above,
 @                              const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_h_predictor_4x4_neon:
 	vpx_h_predictor_4x4_neon: @
    vld1.32             {d1[0]}, [r3]
    vdup.8              d0, d1[0]
    vst1.32             {d0[0]}, [r0], r1
    vdup.8              d0, d1[1]
    vst1.32             {d0[0]}, [r0], r1
    vdup.8              d0, d1[2]
    vst1.32             {d0[0]}, [r0], r1
    vdup.8              d0, d1[3]
    vst1.32             {d0[0]}, [r0], r1
    bx                  lr
    @                 @ |vpx_h_predictor_4x4_neon|
 @void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                              const uint8_t *above,
 @                              const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_h_predictor_8x8_neon:
 	vpx_h_predictor_8x8_neon: @
    vld1.64             {d1}, [r3]
    vdup.8              d0, d1[0]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[1]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[2]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[3]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[4]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[5]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[6]
    vst1.64             {d0}, [r0], r1
    vdup.8              d0, d1[7]
    vst1.64             {d0}, [r0], r1
    bx                  lr
    @                 @ |vpx_h_predictor_8x8_neon|
 @void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                                const uint8_t *above,
 @                                const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_h_predictor_16x16_neon:
 	vpx_h_predictor_16x16_neon: @
    vld1.8              {q1}, [r3]
    vdup.8              q0, d2[0]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[1]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[2]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[3]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[4]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[5]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[6]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[7]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[0]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[1]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[2]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[3]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[4]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[5]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[6]
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[7]
    vst1.8              {q0}, [r0], r1
    bx                  lr
    @                 @ |vpx_h_predictor_16x16_neon|
 @void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
 @                                const uint8_t *above,
 @                                const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_h_predictor_32x32_neon:
 	vpx_h_predictor_32x32_neon: @
    sub                 r1, r1, #16
    mov                 r2, #2
 loop_h:
    vld1.8              {q1}, [r3]!
    vdup.8              q0, d2[0]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[1]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[2]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[3]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[4]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[5]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[6]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d2[7]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[0]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[1]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[2]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[3]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[4]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[5]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[6]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    vdup.8              q0, d3[7]
    vst1.8              {q0}, [r0]!
    vst1.8              {q0}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_h
    bx                  lr
    @                 @ |vpx_h_predictor_32x32_neon|
 @void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride,
 @                                const uint8_t *above,
 @                                const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_tm_predictor_4x4_neon:
 	vpx_tm_predictor_4x4_neon: @
     @ Load ytop_left = above[-1] @
    sub                 r12, r2, #1
    vld1.u8             {d0[]}, [r12]
     @ Load above 4 pixels
    vld1.32             {d2[0]}, [r2]
     @ Compute above - ytop_left
    vsubl.u8            q3, d2, d0
     @ Load left row by row and compute left + (above - ytop_left)
     @ 1st row and 2nd row
    vld1.u8             {d2[]}, [r3]!
    vld1.u8             {d4[]}, [r3]!
    vmovl.u8            q1, d2
    vmovl.u8            q2, d4
    vadd.s16            q1, q1, q3
    vadd.s16            q2, q2, q3
    vqmovun.s16         d0, q1
    vqmovun.s16         d1, q2
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d1[0]}, [r0], r1
     @ 3rd row and 4th row
    vld1.u8             {d2[]}, [r3]!
    vld1.u8             {d4[]}, [r3]
    vmovl.u8            q1, d2
    vmovl.u8            q2, d4
    vadd.s16            q1, q1, q3
    vadd.s16            q2, q2, q3
    vqmovun.s16         d0, q1
    vqmovun.s16         d1, q2
    vst1.32             {d0[0]}, [r0], r1
    vst1.32             {d1[0]}, [r0], r1
    bx                  lr
    @                 @ |vpx_tm_predictor_4x4_neon|
 @void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride,
 @                                const uint8_t *above,
 @                                const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_tm_predictor_8x8_neon:
 	vpx_tm_predictor_8x8_neon: @
     @ Load ytop_left = above[-1] @
    sub                 r12, r2, #1
    vld1.8              {d0[]}, [r12]
     @ preload 8 left
    vld1.8              {d30}, [r3]
     @ Load above 8 pixels
    vld1.64             {d2}, [r2]
    vmovl.u8            q10, d30
     @ Compute above - ytop_left
    vsubl.u8            q3, d2, d0
     @ Load left row by row and compute left + (above - ytop_left)
     @ 1st row and 2nd row
    vdup.16             q0, d20[0]
    vdup.16             q1, d20[1]
    vadd.s16            q0, q3, q0
    vadd.s16            q1, q3, q1
     @ 3rd row and 4th row
    vdup.16             q8, d20[2]
    vdup.16             q9, d20[3]
    vadd.s16            q8, q3, q8
    vadd.s16            q9, q3, q9
    vqmovun.s16         d0, q0
    vqmovun.s16         d1, q1
    vqmovun.s16         d2, q8
    vqmovun.s16         d3, q9
    vst1.64             {d0}, [r0], r1
    vst1.64             {d1}, [r0], r1
    vst1.64             {d2}, [r0], r1
    vst1.64             {d3}, [r0], r1
     @ 5th row and 6th row
    vdup.16             q0, d21[0]
    vdup.16             q1, d21[1]
    vadd.s16            q0, q3, q0
    vadd.s16            q1, q3, q1
     @ 7th row and 8th row
    vdup.16             q8, d21[2]
    vdup.16             q9, d21[3]
    vadd.s16            q8, q3, q8
    vadd.s16            q9, q3, q9
    vqmovun.s16         d0, q0
    vqmovun.s16         d1, q1
    vqmovun.s16         d2, q8
    vqmovun.s16         d3, q9
    vst1.64             {d0}, [r0], r1
    vst1.64             {d1}, [r0], r1
    vst1.64             {d2}, [r0], r1
    vst1.64             {d3}, [r0], r1
    bx                  lr
    @                 @ |vpx_tm_predictor_8x8_neon|
 @void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride,
 @                                const uint8_t *above,
 @                                const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_tm_predictor_16x16_neon:
 	vpx_tm_predictor_16x16_neon: @
     @ Load ytop_left = above[-1] @
    sub                 r12, r2, #1
    vld1.8              {d0[]}, [r12]
     @ Load above 8 pixels
    vld1.8              {q1}, [r2]
     @ preload 8 left into r12
    vld1.8              {d18}, [r3]!
     @ Compute above - ytop_left
    vsubl.u8            q2, d2, d0
    vsubl.u8            q3, d3, d0
    vmovl.u8            q10, d18
     @ Load left row by row and compute left + (above - ytop_left)
     @ Process 8 rows in each single loop and loop 2 times to process 16 rows.
    mov                 r2, #2
 loop_16x16_neon:
     @ Process two rows.
    vdup.16             q0, d20[0]
    vdup.16             q8, d20[1]
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vdup.16             q0, d20[2]                   @ proload next 2 rows data
    vdup.16             q8, d20[3]
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
     @ Process two rows.
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vdup.16             q0, d21[0]                   @ proload next 2 rows data
    vdup.16             q8, d21[1]
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vdup.16             q0, d21[2]                   @ proload next 2 rows data
    vdup.16             q8, d21[3]
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
    vadd.s16            q1, q0, q2
    vadd.s16            q0, q0, q3
    vadd.s16            q11, q8, q2
    vadd.s16            q8, q8, q3
    vqmovun.s16         d2, q1
    vqmovun.s16         d3, q0
    vqmovun.s16         d22, q11
    vqmovun.s16         d23, q8
    vld1.8              {d18}, [r3]!                   @ preload 8 left into r12
    vmovl.u8            q10, d18
    vst1.64             {d2,d3}, [r0], r1
    vst1.64             {d22,d23}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_16x16_neon
    bx                  lr
    @                 @ |vpx_tm_predictor_16x16_neon|
 @void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
 @                                  const uint8_t *above,
 @                                  const uint8_t *left)
 @ r0  uint8_t *dst
 @ r1  ptrdiff_t y_stride
 @ r2  const uint8_t *above
 @ r3  const uint8_t *left
 _vpx_tm_predictor_32x32_neon:
 	vpx_tm_predictor_32x32_neon: @
     @ Load ytop_left = above[-1] @
    sub                 r12, r2, #1
    vld1.8              {d0[]}, [r12]
     @ Load above 32 pixels
    vld1.8              {q1}, [r2]!
    vld1.8              {q2}, [r2]
     @ preload 8 left pixels
    vld1.8              {d26}, [r3]!
     @ Compute above - ytop_left
    vsubl.u8            q8, d2, d0
    vsubl.u8            q9, d3, d0
    vsubl.u8            q10, d4, d0
    vsubl.u8            q11, d5, d0
    vmovl.u8            q3, d26
     @ Load left row by row and compute left + (above - ytop_left)
     @ Process 8 rows in each single loop and loop 4 times to process 32 rows.
    mov                 r2, #4
 loop_32x32_neon:
     @ Process two rows.
    vdup.16             q0, d6[0]
    vdup.16             q2, d6[1]
    vadd.s16            q12, q0, q8
    vadd.s16            q13, q0, q9
    vadd.s16            q14, q0, q10
    vadd.s16            q15, q0, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vdup.16             q1, d6[2]
    vdup.16             q2, d6[3]
    vst1.64             {d24-d27}, [r0], r1
     @ Process two rows.
    vadd.s16            q12, q1, q8
    vadd.s16            q13, q1, q9
    vadd.s16            q14, q1, q10
    vadd.s16            q15, q1, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vdup.16             q0, d7[0]
    vdup.16             q2, d7[1]
    vst1.64             {d24-d27}, [r0], r1
     @ Process two rows.
    vadd.s16            q12, q0, q8
    vadd.s16            q13, q0, q9
    vadd.s16            q14, q0, q10
    vadd.s16            q15, q0, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vdup.16             q0, d7[2]
    vdup.16             q2, d7[3]
    vst1.64             {d24-d27}, [r0], r1
     @ Process two rows.
    vadd.s16            q12, q0, q8
    vadd.s16            q13, q0, q9
    vadd.s16            q14, q0, q10
    vadd.s16            q15, q0, q11
    vqmovun.s16         d0, q12
    vqmovun.s16         d1, q13
    vadd.s16            q12, q2, q8
    vadd.s16            q13, q2, q9
    vqmovun.s16         d2, q14
    vqmovun.s16         d3, q15
    vadd.s16            q14, q2, q10
    vadd.s16            q15, q2, q11
    vst1.64             {d0-d3}, [r0], r1
    vqmovun.s16         d24, q12
    vqmovun.s16         d25, q13
    vld1.8              {d0}, [r3]!                    @ preload 8 left pixels
    vqmovun.s16         d26, q14
    vqmovun.s16         d27, q15
    vmovl.u8            q3, d0
    vst1.64             {d24-d27}, [r0], r1
    subs                r2, r2, #1
    bgt                 loop_32x32_neon
    bx                  lr
    @                 @ |vpx_tm_predictor_32x32_neon|
--- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s
+++ b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/loopfilter_mb_neon.s
@ -0,0 +1,649 @@
@ This file was created from a .asm file
@  using the ads2gas_apple.pl script.
 	.set WIDE_REFERENCE, 0
 	.set ARCHITECTURE, 5
 	.set DO1STROUNDING, 0
 @
 @  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
 @
 @  Use of this source code is governed by a BSD-style license
 @  that can be found in the LICENSE file in the root of the source
 @  tree. An additional intellectual property rights grant can be found
 @  in the file PATENTS.  All contributing project authors may
 @  be found in the AUTHORS file in the root of the source tree.
 @
    .globl _vpx_lpf_horizontal_edge_8_neon
 	.globl vpx_lpf_horizontal_edge_8_neon
    .globl _vpx_lpf_horizontal_edge_16_neon
 	.globl vpx_lpf_horizontal_edge_16_neon
    .globl _vpx_lpf_vertical_16_neon
 	.globl vpx_lpf_vertical_16_neon
   @ ARM
 .text
 .p2align 2
 @ void mb_lpf_horizontal_edge(uint8_t *s, int p,
 @                             const uint8_t *blimit,
 @                             const uint8_t *limit,
 @                             const uint8_t *thresh,
 @                             int count)
 @ r0    uint8_t *s,
 @ r1    int p, /* pitch */
 @ r2    const uint8_t *blimit,
 @ r3    const uint8_t *limit,
 @ sp    const uint8_t *thresh,
 @ r12   int count
 _mb_lpf_horizontal_edge:
 	mb_lpf_horizontal_edge: @
    push        {r4-r8, lr}
    vpush       {d8-d15}
    ldr         r4, [sp, #88]               @ load thresh
 h_count:
    vld1.8      {d16[]}, [r2]               @ load *blimit
    vld1.8      {d17[]}, [r3]               @ load *limit
    vld1.8      {d18[]}, [r4]               @ load *thresh
    sub         r8, r0, r1, lsl #3          @ move src pointer down by 8 lines
    vld1.u8     {d0}, [r8,:64], r1           @ p7
    vld1.u8     {d1}, [r8,:64], r1           @ p6
    vld1.u8     {d2}, [r8,:64], r1           @ p5
    vld1.u8     {d3}, [r8,:64], r1           @ p4
    vld1.u8     {d4}, [r8,:64], r1           @ p3
    vld1.u8     {d5}, [r8,:64], r1           @ p2
    vld1.u8     {d6}, [r8,:64], r1           @ p1
    vld1.u8     {d7}, [r8,:64], r1           @ p0
    vld1.u8     {d8}, [r8,:64], r1           @ q0
    vld1.u8     {d9}, [r8,:64], r1           @ q1
    vld1.u8     {d10}, [r8,:64], r1          @ q2
    vld1.u8     {d11}, [r8,:64], r1          @ q3
    vld1.u8     {d12}, [r8,:64], r1          @ q4
    vld1.u8     {d13}, [r8,:64], r1          @ q5
    vld1.u8     {d14}, [r8,:64], r1          @ q6
    vld1.u8     {d15}, [r8,:64], r1          @ q7
    bl          vpx_wide_mbfilter_neon
    tst         r7, #1
    beq         h_mbfilter
     @ flat && mask were not set for any of the channels. Just store the values
     @ from filter.
    sub         r8, r0, r1, lsl #1
    vst1.u8     {d25}, [r8,:64], r1          @ store op1
    vst1.u8     {d24}, [r8,:64], r1          @ store op0
    vst1.u8     {d23}, [r8,:64], r1          @ store oq0
    vst1.u8     {d26}, [r8,:64], r1          @ store oq1
    b           h_next
 h_mbfilter:
    tst         r7, #2
    beq         h_wide_mbfilter
     @ flat2 was not set for any of the channels. Just store the values from
     @ mbfilter.
    sub         r8, r0, r1, lsl #1
    sub         r8, r8, r1
    vst1.u8     {d18}, [r8,:64], r1          @ store op2
    vst1.u8     {d19}, [r8,:64], r1          @ store op1
    vst1.u8     {d20}, [r8,:64], r1          @ store op0
    vst1.u8     {d21}, [r8,:64], r1          @ store oq0
    vst1.u8     {d22}, [r8,:64], r1          @ store oq1
    vst1.u8     {d23}, [r8,:64], r1          @ store oq2
    b           h_next
 h_wide_mbfilter:
    sub         r8, r0, r1, lsl #3
    add         r8, r8, r1
    vst1.u8     {d16}, [r8,:64], r1          @ store op6
    vst1.u8     {d24}, [r8,:64], r1          @ store op5
    vst1.u8     {d25}, [r8,:64], r1          @ store op4
    vst1.u8     {d26}, [r8,:64], r1          @ store op3
    vst1.u8     {d27}, [r8,:64], r1          @ store op2
    vst1.u8     {d18}, [r8,:64], r1          @ store op1
    vst1.u8     {d19}, [r8,:64], r1          @ store op0
    vst1.u8     {d20}, [r8,:64], r1          @ store oq0
    vst1.u8     {d21}, [r8,:64], r1          @ store oq1
    vst1.u8     {d22}, [r8,:64], r1          @ store oq2
    vst1.u8     {d23}, [r8,:64], r1          @ store oq3
    vst1.u8     {d1}, [r8,:64], r1           @ store oq4
    vst1.u8     {d2}, [r8,:64], r1           @ store oq5
    vst1.u8     {d3}, [r8,:64], r1           @ store oq6
 h_next:
    add         r0, r0, #8
    subs        r12, r12, #1
    bne         h_count
    vpop        {d8-d15}
    pop         {r4-r8, pc}
    @         @ |mb_lpf_horizontal_edge|
 @ void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
 @                                     const uint8_t *blimit,
 @                                     const uint8_t *limit,
 @                                     const uint8_t *thresh)
 @ r0    uint8_t *s,
 @ r1    int pitch,
 @ r2    const uint8_t *blimit,
 @ r3    const uint8_t *limit,
 @ sp    const uint8_t *thresh
 _vpx_lpf_horizontal_edge_8_neon:
 	vpx_lpf_horizontal_edge_8_neon: @
    mov r12, #1
    b mb_lpf_horizontal_edge
    @         @ |vpx_lpf_horizontal_edge_8_neon|
 @ void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
 @                                      const uint8_t *blimit,
 @                                      const uint8_t *limit,
 @                                      const uint8_t *thresh)
 @ r0    uint8_t *s,
 @ r1    int pitch,
 @ r2    const uint8_t *blimit,
 @ r3    const uint8_t *limit,
 @ sp    const uint8_t *thresh
 _vpx_lpf_horizontal_edge_16_neon:
 	vpx_lpf_horizontal_edge_16_neon: @
    mov r12, #2
    b mb_lpf_horizontal_edge
    @         @ |vpx_lpf_horizontal_edge_16_neon|
 @ void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
 @                               const uint8_t *blimit,
 @                               const uint8_t *limit,
 @                               const uint8_t *thresh)
 @ r0    uint8_t *s,
 @ r1    int p, /* pitch */
 @ r2    const uint8_t *blimit,
 @ r3    const uint8_t *limit,
 @ sp    const uint8_t *thresh,
 _vpx_lpf_vertical_16_neon:
 	vpx_lpf_vertical_16_neon: @
    push        {r4-r8, lr}
    vpush       {d8-d15}
    ldr         r4, [sp, #88]               @ load thresh
    vld1.8      {d16[]}, [r2]               @ load *blimit
    vld1.8      {d17[]}, [r3]               @ load *limit
    vld1.8      {d18[]}, [r4]               @ load *thresh
    sub         r8, r0, #8
    vld1.8      {d0}, [r8,:64], r1
    vld1.8      {d8}, [r0,:64], r1
    vld1.8      {d1}, [r8,:64], r1
    vld1.8      {d9}, [r0,:64], r1
    vld1.8      {d2}, [r8,:64], r1
    vld1.8      {d10}, [r0,:64], r1
    vld1.8      {d3}, [r8,:64], r1
    vld1.8      {d11}, [r0,:64], r1
    vld1.8      {d4}, [r8,:64], r1
    vld1.8      {d12}, [r0,:64], r1
    vld1.8      {d5}, [r8,:64], r1
    vld1.8      {d13}, [r0,:64], r1
    vld1.8      {d6}, [r8,:64], r1
    vld1.8      {d14}, [r0,:64], r1
    vld1.8      {d7}, [r8,:64], r1
    vld1.8      {d15}, [r0,:64], r1
    sub         r0, r0, r1, lsl #3
    vtrn.32     q0, q2
    vtrn.32     q1, q3
    vtrn.32     q4, q6
    vtrn.32     q5, q7
    vtrn.16     q0, q1
    vtrn.16     q2, q3
    vtrn.16     q4, q5
    vtrn.16     q6, q7
    vtrn.8      d0, d1
    vtrn.8      d2, d3
    vtrn.8      d4, d5
    vtrn.8      d6, d7
    vtrn.8      d8, d9
    vtrn.8      d10, d11
    vtrn.8      d12, d13
    vtrn.8      d14, d15
    bl          vpx_wide_mbfilter_neon
    tst         r7, #1
    beq         v_mbfilter
     @ flat && mask were not set for any of the channels. Just store the values
     @ from filter.
    sub         r8, r0, #2
    vswp        d23, d25
    vst4.8      {d23[0], d24[0], d25[0], d26[0]}, [r8], r1
    vst4.8      {d23[1], d24[1], d25[1], d26[1]}, [r8], r1
    vst4.8      {d23[2], d24[2], d25[2], d26[2]}, [r8], r1
    vst4.8      {d23[3], d24[3], d25[3], d26[3]}, [r8], r1
    vst4.8      {d23[4], d24[4], d25[4], d26[4]}, [r8], r1
    vst4.8      {d23[5], d24[5], d25[5], d26[5]}, [r8], r1
    vst4.8      {d23[6], d24[6], d25[6], d26[6]}, [r8], r1
    vst4.8      {d23[7], d24[7], d25[7], d26[7]}, [r8], r1
    b           v_end
 v_mbfilter:
    tst         r7, #2
    beq         v_wide_mbfilter
     @ flat2 was not set for any of the channels. Just store the values from
     @ mbfilter.
    sub         r8, r0, #3
    vst3.8      {d18[0], d19[0], d20[0]}, [r8], r1
    vst3.8      {d21[0], d22[0], d23[0]}, [r0], r1
    vst3.8      {d18[1], d19[1], d20[1]}, [r8], r1
    vst3.8      {d21[1], d22[1], d23[1]}, [r0], r1
    vst3.8      {d18[2], d19[2], d20[2]}, [r8], r1
    vst3.8      {d21[2], d22[2], d23[2]}, [r0], r1
    vst3.8      {d18[3], d19[3], d20[3]}, [r8], r1
    vst3.8      {d21[3], d22[3], d23[3]}, [r0], r1
    vst3.8      {d18[4], d19[4], d20[4]}, [r8], r1
    vst3.8      {d21[4], d22[4], d23[4]}, [r0], r1
    vst3.8      {d18[5], d19[5], d20[5]}, [r8], r1
    vst3.8      {d21[5], d22[5], d23[5]}, [r0], r1
    vst3.8      {d18[6], d19[6], d20[6]}, [r8], r1
    vst3.8      {d21[6], d22[6], d23[6]}, [r0], r1
    vst3.8      {d18[7], d19[7], d20[7]}, [r8], r1
    vst3.8      {d21[7], d22[7], d23[7]}, [r0], r1
    b           v_end
 v_wide_mbfilter:
    sub         r8, r0, #8
    vtrn.32     d0,  d26
    vtrn.32     d16, d27
    vtrn.32     d24, d18
    vtrn.32     d25, d19
    vtrn.16     d0,  d24
    vtrn.16     d16, d25
    vtrn.16     d26, d18
    vtrn.16     d27, d19
    vtrn.8      d0,  d16
    vtrn.8      d24, d25
    vtrn.8      d26, d27
    vtrn.8      d18, d19
    vtrn.32     d20, d1
    vtrn.32     d21, d2
    vtrn.32     d22, d3
    vtrn.32     d23, d15
    vtrn.16     d20, d22
    vtrn.16     d21, d23
    vtrn.16     d1,  d3
    vtrn.16     d2,  d15
    vtrn.8      d20, d21
    vtrn.8      d22, d23
    vtrn.8      d1,  d2
    vtrn.8      d3,  d15
    vst1.8      {d0}, [r8,:64], r1
    vst1.8      {d20}, [r0,:64], r1
    vst1.8      {d16}, [r8,:64], r1
    vst1.8      {d21}, [r0,:64], r1
    vst1.8      {d24}, [r8,:64], r1
    vst1.8      {d22}, [r0,:64], r1
    vst1.8      {d25}, [r8,:64], r1
    vst1.8      {d23}, [r0,:64], r1
    vst1.8      {d26}, [r8,:64], r1
    vst1.8      {d1}, [r0,:64], r1
    vst1.8      {d27}, [r8,:64], r1
    vst1.8      {d2}, [r0,:64], r1
    vst1.8      {d18}, [r8,:64], r1
    vst1.8      {d3}, [r0,:64], r1
    vst1.8      {d19}, [r8,:64], r1
    vst1.8      {d15}, [r0,:64], r1
 v_end:
    vpop        {d8-d15}
    pop         {r4-r8, pc}
    @         @ |vpx_lpf_vertical_16_neon|
 @ void vpx_wide_mbfilter_neon() @
 @ This is a helper function for the loopfilters. The invidual functions do the
 @ necessary load, transpose (if necessary) and store.
 @
 @ r0-r3 PRESERVE
 @ d16    blimit
 @ d17    limit
 @ d18    thresh
 @ d0    p7
 @ d1    p6
 @ d2    p5
 @ d3    p4
 @ d4    p3
 @ d5    p2
 @ d6    p1
 @ d7    p0
 @ d8    q0
 @ d9    q1
 @ d10   q2
 @ d11   q3
 @ d12   q4
 @ d13   q5
 @ d14   q6
 @ d15   q7
 _vpx_wide_mbfilter_neon:
 	vpx_wide_mbfilter_neon: @
    mov         r7, #0
     @ filter_mask
    vabd.u8     d19, d4, d5                 @ abs(p3 - p2)
    vabd.u8     d20, d5, d6                 @ abs(p2 - p1)
    vabd.u8     d21, d6, d7                 @ abs(p1 - p0)
    vabd.u8     d22, d9, d8                 @ abs(q1 - q0)
    vabd.u8     d23, d10, d9                @ abs(q2 - q1)
    vabd.u8     d24, d11, d10               @ abs(q3 - q2)
     @ only compare the largest value to limit
    vmax.u8     d19, d19, d20               @ max(abs(p3 - p2), abs(p2 - p1))
    vmax.u8     d20, d21, d22               @ max(abs(p1 - p0), abs(q1 - q0))
    vmax.u8     d23, d23, d24               @ max(abs(q2 - q1), abs(q3 - q2))
    vmax.u8     d19, d19, d20
    vabd.u8     d24, d7, d8                 @ abs(p0 - q0)
    vmax.u8     d19, d19, d23
    vabd.u8     d23, d6, d9                 @ a = abs(p1 - q1)
    vqadd.u8    d24, d24, d24               @ b = abs(p0 - q0) * 2
     @ abs () > limit
    vcge.u8     d19, d17, d19
     @ flatmask4
    vabd.u8     d25, d7, d5                 @ abs(p0 - p2)
    vabd.u8     d26, d8, d10                @ abs(q0 - q2)
    vabd.u8     d27, d4, d7                 @ abs(p3 - p0)
    vabd.u8     d28, d11, d8                @ abs(q3 - q0)
     @ only compare the largest value to thresh
    vmax.u8     d25, d25, d26               @ max(abs(p0 - p2), abs(q0 - q2))
    vmax.u8     d26, d27, d28               @ max(abs(p3 - p0), abs(q3 - q0))
    vmax.u8     d25, d25, d26
    vmax.u8     d20, d20, d25
    vshr.u8     d23, d23, #1                @ a = a / 2
    vqadd.u8    d24, d24, d23               @ a = b + a
    vmov.u8     d30, #1
    vcge.u8     d24, d16, d24               @ (a > blimit * 2 + limit) * -1
    vcge.u8     d20, d30, d20               @ flat
    vand        d19, d19, d24               @ mask
     @ hevmask
    vcgt.u8     d21, d21, d18               @ (abs(p1 - p0) > thresh)*-1
    vcgt.u8     d22, d22, d18               @ (abs(q1 - q0) > thresh)*-1
    vorr        d21, d21, d22               @ hev
    vand        d16, d20, d19               @ flat && mask
    vmov        r5, r6, d16
     @ flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7)
    vabd.u8     d22, d3, d7                 @ abs(p4 - p0)
    vabd.u8     d23, d12, d8                @ abs(q4 - q0)
    vabd.u8     d24, d7, d2                 @ abs(p0 - p5)
    vabd.u8     d25, d8, d13                @ abs(q0 - q5)
    vabd.u8     d26, d1, d7                 @ abs(p6 - p0)
    vabd.u8     d27, d14, d8                @ abs(q6 - q0)
    vabd.u8     d28, d0, d7                 @ abs(p7 - p0)
    vabd.u8     d29, d15, d8                @ abs(q7 - q0)
     @ only compare the largest value to thresh
    vmax.u8     d22, d22, d23               @ max(abs(p4 - p0), abs(q4 - q0))
    vmax.u8     d23, d24, d25               @ max(abs(p0 - p5), abs(q0 - q5))
    vmax.u8     d24, d26, d27               @ max(abs(p6 - p0), abs(q6 - q0))
    vmax.u8     d25, d28, d29               @ max(abs(p7 - p0), abs(q7 - q0))
    vmax.u8     d26, d22, d23
    vmax.u8     d27, d24, d25
    vmax.u8     d23, d26, d27
    vcge.u8     d18, d30, d23               @ flat2
    vmov.u8     d22, #0x80
    orrs        r5, r5, r6                  @ Check for 0
    orreq       r7, r7, #1                  @ Only do filter branch
    vand        d17, d18, d16               @ flat2 && flat && mask
    vmov        r5, r6, d17
     @ mbfilter() function
     @ filter() function
     @ convert to signed
    veor        d23, d8, d22                @ qs0
    veor        d24, d7, d22                @ ps0
    veor        d25, d6, d22                @ ps1
    veor        d26, d9, d22                @ qs1
    vmov.u8     d27, #3
    vsub.s8     d28, d23, d24               @ ( qs0 - ps0)
    vqsub.s8    d29, d25, d26               @ filter = clamp(ps1-qs1)
    vmull.s8    q15, d28, d27               @ 3 * ( qs0 - ps0)
    vand        d29, d29, d21               @ filter &= hev
    vaddw.s8    q15, q15, d29               @ filter + 3 * (qs0 - ps0)
    vmov.u8     d29, #4
     @ filter = clamp(filter + 3 * ( qs0 - ps0))
    vqmovn.s16  d28, q15
    vand        d28, d28, d19               @ filter &= mask
    vqadd.s8    d30, d28, d27               @ filter2 = clamp(filter+3)
    vqadd.s8    d29, d28, d29               @ filter1 = clamp(filter+4)
    vshr.s8     d30, d30, #3                @ filter2 >>= 3
    vshr.s8     d29, d29, #3                @ filter1 >>= 3
    vqadd.s8    d24, d24, d30               @ op0 = clamp(ps0 + filter2)
    vqsub.s8    d23, d23, d29               @ oq0 = clamp(qs0 - filter1)
     @ outer tap adjustments: ++filter1 >> 1
    vrshr.s8    d29, d29, #1
    vbic        d29, d29, d21               @ filter &= ~hev
    vqadd.s8    d25, d25, d29               @ op1 = clamp(ps1 + filter)
    vqsub.s8    d26, d26, d29               @ oq1 = clamp(qs1 - filter)
    veor        d24, d24, d22               @ *f_op0 = u^0x80
    veor        d23, d23, d22               @ *f_oq0 = u^0x80
    veor        d25, d25, d22               @ *f_op1 = u^0x80
    veor        d26, d26, d22               @ *f_oq1 = u^0x80
    tst         r7, #1
    bxne        lr
    orrs        r5, r5, r6                  @ Check for 0
    orreq       r7, r7, #2                  @ Only do mbfilter branch
     @ mbfilter flat && mask branch
     @ TODO(fgalligan): Can I decrease the cycles shifting to consective d's
     @ and using vibt on the q's?
    vmov.u8     d29, #2
    vaddl.u8    q15, d7, d8                 @ op2 = p0 + q0
    vmlal.u8    q15, d4, d27                @ op2 = p0 + q0 + p3 * 3
    vmlal.u8    q15, d5, d29                @ op2 = p0 + q0 + p3 * 3 + p2 * 2
    vaddl.u8    q10, d4, d5
    vaddw.u8    q15, d6                     @ op2=p1 + p0 + q0 + p3 * 3 + p2 *2
    vaddl.u8    q14, d6, d9
    vqrshrn.u16 d18, q15, #3                @ r_op2
    vsub.i16    q15, q10
    vaddl.u8    q10, d4, d6
    vadd.i16    q15, q14
    vaddl.u8    q14, d7, d10
    vqrshrn.u16 d19, q15, #3                @ r_op1
    vsub.i16    q15, q10
    vadd.i16    q15, q14
    vaddl.u8    q14, d8, d11
    vqrshrn.u16 d20, q15, #3                @ r_op0
    vsubw.u8    q15, d4                     @ oq0 = op0 - p3
    vsubw.u8    q15, d7                     @ oq0 -= p0
    vadd.i16    q15, q14
    vaddl.u8    q14, d9, d11
    vqrshrn.u16 d21, q15, #3                @ r_oq0
    vsubw.u8    q15, d5                     @ oq1 = oq0 - p2
    vsubw.u8    q15, d8                     @ oq1 -= q0
    vadd.i16    q15, q14
    vaddl.u8    q14, d10, d11
    vqrshrn.u16 d22, q15, #3                @ r_oq1
    vsubw.u8    q15, d6                     @ oq2 = oq0 - p1
    vsubw.u8    q15, d9                     @ oq2 -= q1
    vadd.i16    q15, q14
    vqrshrn.u16 d27, q15, #3                @ r_oq2
     @ Filter does not set op2 or oq2, so use p2 and q2.
    vbif        d18, d5, d16                @ t_op2 |= p2 & ~(flat & mask)
    vbif        d19, d25, d16               @ t_op1 |= f_op1 & ~(flat & mask)
    vbif        d20, d24, d16               @ t_op0 |= f_op0 & ~(flat & mask)
    vbif        d21, d23, d16               @ t_oq0 |= f_oq0 & ~(flat & mask)
    vbif        d22, d26, d16               @ t_oq1 |= f_oq1 & ~(flat & mask)
    vbit        d23, d27, d16               @ t_oq2 |= r_oq2 & (flat & mask)
    vbif        d23, d10, d16               @ t_oq2 |= q2 & ~(flat & mask)
    tst         r7, #2
    bxne        lr
     @ wide_mbfilter flat2 && flat && mask branch
    vmov.u8     d16, #7
    vaddl.u8    q15, d7, d8                 @ op6 = p0 + q0
    vaddl.u8    q12, d2, d3
    vaddl.u8    q13, d4, d5
    vaddl.u8    q14, d1, d6
    vmlal.u8    q15, d0, d16                @ op6 += p7 * 3
    vadd.i16    q12, q13
    vadd.i16    q15, q14
    vaddl.u8    q14, d2, d9
    vadd.i16    q15, q12
    vaddl.u8    q12, d0, d1
    vaddw.u8    q15, d1
    vaddl.u8    q13, d0, d2
    vadd.i16    q14, q15, q14
    vqrshrn.u16 d16, q15, #4                @ w_op6
    vsub.i16    q15, q14, q12
    vaddl.u8    q14, d3, d10
    vqrshrn.u16 d24, q15, #4                @ w_op5
    vsub.i16    q15, q13
    vaddl.u8    q13, d0, d3
    vadd.i16    q15, q14
    vaddl.u8    q14, d4, d11
    vqrshrn.u16 d25, q15, #4                @ w_op4
    vadd.i16    q15, q14
    vaddl.u8    q14, d0, d4
    vsub.i16    q15, q13
    vsub.i16    q14, q15, q14
    vqrshrn.u16 d26, q15, #4                @ w_op3
    vaddw.u8    q15, q14, d5                @ op2 += p2
    vaddl.u8    q14, d0, d5
    vaddw.u8    q15, d12                    @ op2 += q4
    vbif        d26, d4, d17                @ op3 |= p3 & ~(f2 & f & m)
    vqrshrn.u16 d27, q15, #4                @ w_op2
    vsub.i16    q15, q14
    vaddl.u8    q14, d0, d6
    vaddw.u8    q15, d6                     @ op1 += p1
    vaddw.u8    q15, d13                    @ op1 += q5
    vbif        d27, d18, d17               @ op2 |= t_op2 & ~(f2 & f & m)
    vqrshrn.u16 d18, q15, #4                @ w_op1
    vsub.i16    q15, q14
    vaddl.u8    q14, d0, d7
    vaddw.u8    q15, d7                     @ op0 += p0
    vaddw.u8    q15, d14                    @ op0 += q6
    vbif        d18, d19, d17               @ op1 |= t_op1 & ~(f2 & f & m)
    vqrshrn.u16 d19, q15, #4                @ w_op0
    vsub.i16    q15, q14
    vaddl.u8    q14, d1, d8
    vaddw.u8    q15, d8                     @ oq0 += q0
    vaddw.u8    q15, d15                    @ oq0 += q7
    vbif        d19, d20, d17               @ op0 |= t_op0 & ~(f2 & f & m)
    vqrshrn.u16 d20, q15, #4                @ w_oq0
    vsub.i16    q15, q14
    vaddl.u8    q14, d2, d9
    vaddw.u8    q15, d9                     @ oq1 += q1
    vaddl.u8    q4, d10, d15
    vaddw.u8    q15, d15                    @ oq1 += q7
    vbif        d20, d21, d17               @ oq0 |= t_oq0 & ~(f2 & f & m)
    vqrshrn.u16 d21, q15, #4                @ w_oq1
    vsub.i16    q15, q14
    vaddl.u8    q14, d3, d10
    vadd.i16    q15, q4
    vaddl.u8    q4, d11, d15
    vbif        d21, d22, d17               @ oq1 |= t_oq1 & ~(f2 & f & m)
    vqrshrn.u16 d22, q15, #4                @ w_oq2
    vsub.i16    q15, q14
    vaddl.u8    q14, d4, d11
    vadd.i16    q15, q4
    vaddl.u8    q4, d12, d15
    vbif        d22, d23, d17               @ oq2 |= t_oq2 & ~(f2 & f & m)
    vqrshrn.u16 d23, q15, #4                @ w_oq3
    vsub.i16    q15, q14
    vaddl.u8    q14, d5, d12
    vadd.i16    q15, q4
    vaddl.u8    q4, d13, d15
    vbif        d16, d1, d17                @ op6 |= p6 & ~(f2 & f & m)
    vqrshrn.u16 d1, q15, #4                 @ w_oq4
    vsub.i16    q15, q14
    vaddl.u8    q14, d6, d13
    vadd.i16    q15, q4
    vaddl.u8    q4, d14, d15
    vbif        d24, d2, d17                @ op5 |= p5 & ~(f2 & f & m)
    vqrshrn.u16 d2, q15, #4                 @ w_oq5
    vsub.i16    q15, q14
    vbif        d25, d3, d17                @ op4 |= p4 & ~(f2 & f & m)
    vadd.i16    q15, q4
    vbif        d23, d11, d17               @ oq3 |= q3 & ~(f2 & f & m)
    vqrshrn.u16 d3, q15, #4                 @ w_oq6
    vbif        d1, d12, d17                @ oq4 |= q4 & ~(f2 & f & m)
    vbif        d2, d13, d17                @ oq5 |= q5 & ~(f2 & f & m)
    vbif        d3, d14, d17                @ oq6 |= q6 & ~(f2 & f & m)
    bx          lr
    @         @ |vpx_wide_mbfilter_neon|
--- a/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s
+++ b/thirdparty/libvpx/vpx_dsp/arm/gas_apple/save_reg_neon.s
@ -0,0 +1,46 @@
@ This file was created from a .asm file
@  using the ads2gas_apple.pl script.
 	.set WIDE_REFERENCE, 0
 	.set ARCHITECTURE, 5
 	.set DO1STROUNDING, 0
 @
 @  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 @
 @  Use of this source code is governed by a BSD-style license
 @  that can be found in the LICENSE file in the root of the source
 @  tree. An additional intellectual property rights grant can be found
 @  in the file PATENTS.  All contributing project authors may
 @  be found in the AUTHORS file in the root of the source tree.
 @
    .globl _vpx_push_neon
 	.globl vpx_push_neon
    .globl _vpx_pop_neon
 	.globl vpx_pop_neon
   @ ARM
   @ 
   @ PRESERVE8
 .text
 .p2align 2
 _vpx_push_neon:
 	vpx_push_neon: @
    vst1.i64            {d8, d9, d10, d11}, [r0]!
    vst1.i64            {d12, d13, d14, d15}, [r0]!
    bx              lr
    @
 _vpx_pop_neon:
 	vpx_pop_neon: @
    vld1.i64            {d8, d9, d10, d11}, [r0]!
    vld1.i64            {d12, d13, d14, d15}, [r0]!
    bx              lr
    @
--- a/thirdparty/libvpx/vpx_dsp_rtcd.h
+++ b/thirdparty/libvpx/vpx_dsp_rtcd.h
@ -0,0 +1,9 @@
 #include "vpx_config.h"
 #if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
 	#include "rtcd/vpx_dsp_rtcd_x86.h"
 #elif defined(WEBM_ARMASM) && ARCH_ARM
 	#include "rtcd/vpx_dsp_rtcd_arm.h"
 #else
 	#include "rtcd/vpx_dsp_rtcd_c.h"
 #endif
--- a/thirdparty/libvpx/vpx_ports/arm_cpudetect.c
+++ b/thirdparty/libvpx/vpx_ports/arm_cpudetect.c
@ -38,26 +38,7 @@ static int arm_cpu_env_mask(void) {
 }
 #if !CONFIG_RUNTIME_CPU_DETECT
-
+  #error "CONFIG_RUNTIME_CPU_DETECT should be enabled!"
 int arm_cpu_caps(void) {
  /* This function should actually be a no-op. There is no way to adjust any of
   * these because the RTCD tables do not exist: the functions are called
   * statically */
  int flags;
  int mask;
  if (!arm_cpu_env_flags(&flags)) {
    return flags;
  }
  mask = arm_cpu_env_mask();
 #if HAVE_MEDIA
  flags |= HAS_MEDIA;
 #endif /* HAVE_MEDIA */
 #if HAVE_NEON || HAVE_NEON_ASM
  flags |= HAS_NEON;
 #endif /* HAVE_NEON  || HAVE_NEON_ASM */
  return flags & mask;
 }
 #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */
 /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
 #define WIN32_LEAN_AND_MEAN
@ -76,28 +57,28 @@ int arm_cpu_caps(void) {
   * All of these instructions should be essentially nops.
   */
 #if HAVE_MEDIA
-  if (mask & HAS_MEDIA)
+  if (mask & HAS_MEDIA) {
    __try {
      /*SHADD8 r3,r3,r3*/
      __emit(0xE6333F93);
      flags |= HAS_MEDIA;
    } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
    /*Ignore exception.*/
    }
  }
 }
 #endif /* HAVE_MEDIA */
 #if HAVE_NEON || HAVE_NEON_ASM
-if (mask &HAS_NEON) {
+  if (mask &HAS_NEON) {
-  __try {
+    __try {
-    /*VORR q0,q0,q0*/
+      /*VORR q0,q0,q0*/
-    __emit(0xF2200150);
+      __emit(0xF2200150);
-    flags |= HAS_NEON;
+      flags |= HAS_NEON;
-  } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
+    } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
-    /*Ignore exception.*/
+      /*Ignore exception.*/
    }
  }
 }
 #endif /* HAVE_NEON || HAVE_NEON_ASM */
-return flags & mask;
+  return flags & mask;
 }
 #elif defined(__ANDROID__) /* end _MSC_VER */
@ -170,6 +151,20 @@ int arm_cpu_caps(void) {
  return flags & mask;
 }
 #else /* end __linux__ */
-#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
+int arm_cpu_caps(void) {
-"available for your platform. Reconfigure with --disable-runtime-cpu-detect."
+  int flags;
  int mask;
  if (!arm_cpu_env_flags(&flags)) {
    return flags;
  }
  mask = arm_cpu_env_mask();
 #if HAVE_MEDIA
  flags |= HAS_MEDIA;
 #endif /* HAVE_MEDIA */
 #if HAVE_NEON || HAVE_NEON_ASM
  flags |= HAS_NEON;
 #endif /* HAVE_NEON  || HAVE_NEON_ASM */
  return flags & mask;
 }
 #warning "ARM run-time CPU detection is disabled for this platform..."
 #endif
--- a/thirdparty/libvpx/vpx_ports/system_state.h
+++ b/thirdparty/libvpx/vpx_ports/system_state.h
@ -13,10 +13,10 @@
 #include "./vpx_config.h"
-#if ARCH_X86 || ARCH_X86_64
+#if defined(WEBM_X86ASM) && (ARCH_X86 || ARCH_X86_64)
-void vpx_reset_mmx_state(void);
+	void vpx_reset_mmx_state(void);
-#define vpx_clear_system_state() vpx_reset_mmx_state()
+	#define vpx_clear_system_state() vpx_reset_mmx_state()
 #else
-#define vpx_clear_system_state()
+	#define vpx_clear_system_state()
 #endif  // ARCH_X86 || ARCH_X86_64
 #endif  // VPX_PORTS_SYSTEM_STATE_H_
--- a/thirdparty/libvpx/vpx_scale_rtcd.h
+++ b/thirdparty/libvpx/vpx_scale_rtcd.h
@ -0,0 +1,44 @@
 #ifndef VPX_SCALE_RTCD_H_
 #define VPX_SCALE_RTCD_H_
 #ifdef RTCD_C
 #define RTCD_EXTERN
 #else
 #define RTCD_EXTERN extern
 #endif
 struct yv12_buffer_config;
 #ifdef __cplusplus
 extern "C" {
 #endif
 void vp8_yv12_copy_frame_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
 #define vp8_yv12_copy_frame vp8_yv12_copy_frame_c
 void vp8_yv12_extend_frame_borders_c(struct yv12_buffer_config *ybf);
 #define vp8_yv12_extend_frame_borders vp8_yv12_extend_frame_borders_c
 void vpx_extend_frame_borders_c(struct yv12_buffer_config *ybf);
 #define vpx_extend_frame_borders vpx_extend_frame_borders_c
 void vpx_extend_frame_inner_borders_c(struct yv12_buffer_config *ybf);
 #define vpx_extend_frame_inner_borders vpx_extend_frame_inner_borders_c
 void vpx_yv12_copy_y_c(const struct yv12_buffer_config *src_ybc, struct yv12_buffer_config *dst_ybc);
 #define vpx_yv12_copy_y vpx_yv12_copy_y_c
 void vpx_scale_rtcd(void);
 #ifdef RTCD_C
 static void setup_rtcd_internal(void)
 {
    //Only MIPS has something here, but it is not supported
 }
 #endif
 #ifdef __cplusplus
 }  // extern "C"
 #endif
 #endif