• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/x86/vp8dsp-init.c

Go to the documentation of this file.
00001 /*
00002  * VP8 DSP functions x86-optimized
00003  * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
00004  * Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
00005  *
00006  * This file is part of Libav.
00007  *
00008  * Libav is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Lesser General Public
00010  * License as published by the Free Software Foundation; either
00011  * version 2.1 of the License, or (at your option) any later version.
00012  *
00013  * Libav is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016  * Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with Libav; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00023 #include "libavutil/cpu.h"
00024 #include "libavutil/x86_cpu.h"
00025 #include "libavcodec/vp8dsp.h"
00026 
00027 #if HAVE_YASM
00028 
00029 /*
00030  * MC functions
00031  */
00032 extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride,
00033                                        uint8_t *src, int srcstride,
00034                                        int height, int mx, int my);
00035 extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride,
00036                                        uint8_t *src, int srcstride,
00037                                        int height, int mx, int my);
00038 extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride,
00039                                        uint8_t *src, int srcstride,
00040                                        int height, int mx, int my);
00041 extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride,
00042                                        uint8_t *src, int srcstride,
00043                                        int height, int mx, int my);
00044 
00045 extern void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, int dststride,
00046                                        uint8_t *src, int srcstride,
00047                                        int height, int mx, int my);
00048 extern void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, int dststride,
00049                                        uint8_t *src, int srcstride,
00050                                        int height, int mx, int my);
00051 extern void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, int dststride,
00052                                        uint8_t *src, int srcstride,
00053                                        int height, int mx, int my);
00054 extern void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, int dststride,
00055                                        uint8_t *src, int srcstride,
00056                                        int height, int mx, int my);
00057 
00058 extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
00059                                        uint8_t *src, int srcstride,
00060                                        int height, int mx, int my);
00061 extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
00062                                        uint8_t *src, int srcstride,
00063                                        int height, int mx, int my);
00064 extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
00065                                        uint8_t *src, int srcstride,
00066                                        int height, int mx, int my);
00067 extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
00068                                        uint8_t *src, int srcstride,
00069                                        int height, int mx, int my);
00070 extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
00071                                        uint8_t *src, int srcstride,
00072                                        int height, int mx, int my);
00073 extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride,
00074                                        uint8_t *src, int srcstride,
00075                                        int height, int mx, int my);
00076 extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride,
00077                                        uint8_t *src, int srcstride,
00078                                        int height, int mx, int my);
00079 extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride,
00080                                        uint8_t *src, int srcstride,
00081                                        int height, int mx, int my);
00082 
00083 extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
00084                                           uint8_t *src, int srcstride,
00085                                           int height, int mx, int my);
00086 extern void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, int dststride,
00087                                           uint8_t *src, int srcstride,
00088                                           int height, int mx, int my);
00089 extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
00090                                           uint8_t *src, int srcstride,
00091                                           int height, int mx, int my);
00092 extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
00093                                           uint8_t *src, int srcstride,
00094                                           int height, int mx, int my);
00095 
00096 extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
00097                                           uint8_t *src, int srcstride,
00098                                           int height, int mx, int my);
00099 extern void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, int dststride,
00100                                           uint8_t *src, int srcstride,
00101                                           int height, int mx, int my);
00102 extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
00103                                           uint8_t *src, int srcstride,
00104                                           int height, int mx, int my);
00105 extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
00106                                           uint8_t *src, int srcstride,
00107                                           int height, int mx, int my);
00108 
00109 
00110 extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
00111                                     uint8_t *src, int srcstride,
00112                                     int height, int mx, int my);
00113 extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
00114                                     uint8_t *src, int srcstride,
00115                                     int height, int mx, int my);
00116 extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
00117                                     uint8_t *src, int srcstride,
00118                                     int height, int mx, int my);
00119 
00120 #define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
00121 static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
00122     uint8_t *dst,  int dststride, uint8_t *src, \
00123     int srcstride, int height, int mx, int my) \
00124 { \
00125     ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00126         dst,     dststride, src,     srcstride, height, mx, my); \
00127     ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00128         dst + 8, dststride, src + 8, srcstride, height, mx, my); \
00129 }
00130 #define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
00131 static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
00132     uint8_t *dst,  int dststride, uint8_t *src, \
00133     int srcstride, int height, int mx, int my) \
00134 { \
00135     ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00136         dst,     dststride, src,     srcstride, height, mx, my); \
00137     ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
00138         dst + 4, dststride, src + 4, srcstride, height, mx, my); \
00139 }
00140 
00141 TAP_W8 (mmxext, epel, h4)
00142 TAP_W8 (mmxext, epel, h6)
00143 TAP_W16(mmxext, epel, h6)
00144 TAP_W8 (mmxext, epel, v4)
00145 TAP_W8 (mmxext, epel, v6)
00146 TAP_W16(mmxext, epel, v6)
00147 TAP_W8 (mmxext, bilinear, h)
00148 TAP_W16(mmxext, bilinear, h)
00149 TAP_W8 (mmxext, bilinear, v)
00150 TAP_W16(mmxext, bilinear, v)
00151 
00152 TAP_W16(sse2,   epel, h6)
00153 TAP_W16(sse2,   epel, v6)
00154 TAP_W16(sse2,   bilinear, h)
00155 TAP_W16(sse2,   bilinear, v)
00156 
00157 TAP_W16(ssse3,  epel, h6)
00158 TAP_W16(ssse3,  epel, v6)
00159 TAP_W16(ssse3,  bilinear, h)
00160 TAP_W16(ssse3,  bilinear, v)
00161 
00162 #define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
00163 static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
00164     uint8_t *dst, int dststride, uint8_t *src, \
00165     int srcstride, int height, int mx, int my) \
00166 { \
00167     DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
00168     uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
00169     src -= srcstride * (TAPNUMY / 2 - 1); \
00170     ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
00171         tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
00172     ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
00173         dst, dststride, tmpptr, SIZE,      height,               mx, my); \
00174 }
00175 
00176 #define HVTAPMMX(x, y) \
00177 HVTAP(mmxext, 8, x, y,  4,  8) \
00178 HVTAP(mmxext, 8, x, y,  8, 16)
00179 
00180 HVTAPMMX(4, 4)
00181 HVTAPMMX(4, 6)
00182 HVTAPMMX(6, 4)
00183 HVTAPMMX(6, 6)
00184 HVTAP(mmxext, 8, 6, 6, 16, 16)
00185 
00186 #define HVTAPSSE2(x, y, w) \
00187 HVTAP(sse2,  16, x, y, w, 16) \
00188 HVTAP(ssse3, 16, x, y, w, 16)
00189 
00190 HVTAPSSE2(4, 4, 8)
00191 HVTAPSSE2(4, 6, 8)
00192 HVTAPSSE2(6, 4, 8)
00193 HVTAPSSE2(6, 6, 8)
00194 HVTAPSSE2(6, 6, 16)
00195 
00196 HVTAP(ssse3, 16, 4, 4, 4, 8)
00197 HVTAP(ssse3, 16, 4, 6, 4, 8)
00198 HVTAP(ssse3, 16, 6, 4, 4, 8)
00199 HVTAP(ssse3, 16, 6, 6, 4, 8)
00200 
00201 #define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
00202 static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
00203     uint8_t *dst, int dststride, uint8_t *src, \
00204     int srcstride, int height, int mx, int my) \
00205 { \
00206     DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
00207     ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
00208         tmp, SIZE,      src, srcstride, height + 1, mx, my); \
00209     ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
00210         dst, dststride, tmp, SIZE,      height,     mx, my); \
00211 }
00212 
00213 HVBILIN(mmxext, 8,  4,  8)
00214 HVBILIN(mmxext, 8,  8, 16)
00215 HVBILIN(mmxext, 8, 16, 16)
00216 HVBILIN(sse2,   8,  8, 16)
00217 HVBILIN(sse2,   8, 16, 16)
00218 HVBILIN(ssse3,  8,  4,  8)
00219 HVBILIN(ssse3,  8,  8, 16)
00220 HVBILIN(ssse3,  8, 16, 16)
00221 
00222 extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
00223 extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
00224 extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride);
00225 extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
00226 extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
00227 extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
00228 extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
00229 extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
00230 extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
00231 
00232 #define DECLARE_LOOP_FILTER(NAME)\
00233 extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
00234 extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
00235 extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
00236                                                     int e, int i, int hvt);\
00237 extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
00238                                                     int e, int i, int hvt);\
00239 extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
00240                                                     int s, int e, int i, int hvt);\
00241 extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
00242                                                     int s, int e, int i, int hvt);\
00243 extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
00244                                                     int e, int i, int hvt);\
00245 extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
00246                                                     int e, int i, int hvt);\
00247 extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
00248                                                     int s, int e, int i, int hvt);\
00249 extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
00250                                                     int s, int e, int i, int hvt);
00251 
00252 DECLARE_LOOP_FILTER(mmx)
00253 DECLARE_LOOP_FILTER(mmxext)
00254 DECLARE_LOOP_FILTER(sse2)
00255 DECLARE_LOOP_FILTER(ssse3)
00256 DECLARE_LOOP_FILTER(sse4)
00257 
00258 #endif
00259 
00260 #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
00261     c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
00262     c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
00263     c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
00264 
00265 #define VP8_MC_FUNC(IDX, SIZE, OPT) \
00266     c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
00267     c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
00268     c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
00269     c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
00270     c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
00271     VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
00272 
00273 #define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
00274     c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00275     c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
00276     c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00277     c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00278     c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00279     c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
00280     c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
00281     c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
00282 
00283 
00284 av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
00285 {
00286 #if HAVE_YASM
00287     int mm_flags = av_get_cpu_flags();
00288 
00289     if (mm_flags & AV_CPU_FLAG_MMX) {
00290         c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
00291         c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
00292         c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
00293         c->vp8_idct_add       = ff_vp8_idct_add_mmx;
00294         c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
00295         c->put_vp8_epel_pixels_tab[0][0][0]     =
00296         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
00297         c->put_vp8_epel_pixels_tab[1][0][0]     =
00298         c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
00299 
00300         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
00301         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
00302 
00303         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
00304         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
00305         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
00306         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
00307 
00308         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
00309         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
00310         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
00311         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
00312     }
00313 
00314     /* note that 4-tap width=16 functions are missing because w=16
00315      * is only used for luma, and luma is always a copy or sixtap. */
00316     if (mm_flags & AV_CPU_FLAG_MMX2) {
00317         VP8_LUMA_MC_FUNC(0, 16, mmxext);
00318         VP8_MC_FUNC(1, 8, mmxext);
00319         VP8_MC_FUNC(2, 4, mmxext);
00320         VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
00321         VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
00322         VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
00323 
00324         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
00325         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
00326 
00327         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
00328         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
00329         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
00330         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
00331 
00332         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
00333         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
00334         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
00335         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
00336     }
00337 
00338     if (mm_flags & AV_CPU_FLAG_SSE) {
00339         c->vp8_idct_add                         = ff_vp8_idct_add_sse;
00340         c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
00341         c->put_vp8_epel_pixels_tab[0][0][0]     =
00342         c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
00343     }
00344 
00345     if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
00346         VP8_LUMA_MC_FUNC(0, 16, sse2);
00347         VP8_MC_FUNC(1, 8, sse2);
00348         VP8_BILINEAR_MC_FUNC(0, 16, sse2);
00349         VP8_BILINEAR_MC_FUNC(1, 8, sse2);
00350 
00351         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
00352 
00353         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
00354         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
00355 
00356         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
00357         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
00358     }
00359 
00360     if (mm_flags & AV_CPU_FLAG_SSE2) {
00361         c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
00362 
00363         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
00364 
00365         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
00366         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
00367 
00368         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
00369         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
00370     }
00371 
00372     if (mm_flags & AV_CPU_FLAG_SSSE3) {
00373         VP8_LUMA_MC_FUNC(0, 16, ssse3);
00374         VP8_MC_FUNC(1, 8, ssse3);
00375         VP8_MC_FUNC(2, 4, ssse3);
00376         VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
00377         VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
00378         VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
00379 
00380         c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
00381         c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
00382 
00383         c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
00384         c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
00385         c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
00386         c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
00387 
00388         c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
00389         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
00390         c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
00391         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
00392     }
00393 
00394     if (mm_flags & AV_CPU_FLAG_SSE4) {
00395         c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_sse4;
00396 
00397         c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
00398         c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
00399         c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
00400     }
00401 #endif
00402 }
Generated on Thu Jan 24 2013 17:08:54 for Libav by doxygen 1.7.1