00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088 #include <stdio.h>
00089 #include <stdlib.h>
00090 #include <string.h>
00091 #include <inttypes.h>
00092 #include <assert.h>
00093 #include "config.h"
00094 #include "libswscale/rgb2rgb.h"
00095 #include "libswscale/swscale.h"
00096 #include "libswscale/swscale_internal.h"
00097 #include "libavutil/cpu.h"
00098 #include "yuv2rgb_altivec.h"
00099
00100 #undef PROFILE_THE_BEAST
00101 #undef INC_SCALING
00102
00103 typedef unsigned char ubyte;
00104 typedef signed char sbyte;
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143 static
00144 const vector unsigned char
00145 perm_rgb_0 = {0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
00146 0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a},
00147 perm_rgb_1 = {0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
00148 0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f},
00149 perm_rgb_2 = {0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
00150 0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05},
00151 perm_rgb_3 = {0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
00152 0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f};
00153
00154 #define vec_merge3(x2,x1,x0,y0,y1,y2) \
00155 do { \
00156 __typeof__(x0) o0,o2,o3; \
00157 o0 = vec_mergeh (x0,x1); \
00158 y0 = vec_perm (o0, x2, perm_rgb_0); \
00159 o2 = vec_perm (o0, x2, perm_rgb_1); \
00160 o3 = vec_mergel (x0,x1); \
00161 y1 = vec_perm (o3,o2,perm_rgb_2); \
00162 y2 = vec_perm (o3,o2,perm_rgb_3); \
00163 } while(0)
00164
00165 #define vec_mstbgr24(x0,x1,x2,ptr) \
00166 do { \
00167 __typeof__(x0) _0,_1,_2; \
00168 vec_merge3 (x0,x1,x2,_0,_1,_2); \
00169 vec_st (_0, 0, ptr++); \
00170 vec_st (_1, 0, ptr++); \
00171 vec_st (_2, 0, ptr++); \
00172 } while (0)
00173
00174 #define vec_mstrgb24(x0,x1,x2,ptr) \
00175 do { \
00176 __typeof__(x0) _0,_1,_2; \
00177 vec_merge3 (x2,x1,x0,_0,_1,_2); \
00178 vec_st (_0, 0, ptr++); \
00179 vec_st (_1, 0, ptr++); \
00180 vec_st (_2, 0, ptr++); \
00181 } while (0)
00182
00183
00184
00185
00186
00187 #define vec_mstrgb32(T,x0,x1,x2,x3,ptr) \
00188 do { \
00189 T _0,_1,_2,_3; \
00190 _0 = vec_mergeh (x0,x1); \
00191 _1 = vec_mergeh (x2,x3); \
00192 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00193 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00194 vec_st (_2, 0*16, (T *)ptr); \
00195 vec_st (_3, 1*16, (T *)ptr); \
00196 _0 = vec_mergel (x0,x1); \
00197 _1 = vec_mergel (x2,x3); \
00198 _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
00199 _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
00200 vec_st (_2, 2*16, (T *)ptr); \
00201 vec_st (_3, 3*16, (T *)ptr); \
00202 ptr += 4; \
00203 } while (0)
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222 #define vec_unh(x) \
00223 (vector signed short) \
00224 vec_perm(x,(__typeof__(x)){0}, \
00225 ((vector unsigned char){0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
00226 0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07}))
00227 #define vec_unl(x) \
00228 (vector signed short) \
00229 vec_perm(x,(__typeof__(x)){0}, \
00230 ((vector unsigned char){0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
00231 0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F}))
00232
00233 #define vec_clip_s16(x) \
00234 vec_max (vec_min (x, ((vector signed short){235,235,235,235,235,235,235,235})), \
00235 ((vector signed short){ 16, 16, 16, 16, 16, 16, 16, 16}))
00236
00237 #define vec_packclp(x,y) \
00238 (vector unsigned char)vec_packs \
00239 ((vector unsigned short)vec_max (x,((vector signed short) {0})), \
00240 (vector unsigned short)vec_max (y,((vector signed short) {0})))
00241
00242
00243
00244
00245 static inline void cvtyuvtoRGB (SwsContext *c,
00246 vector signed short Y, vector signed short U, vector signed short V,
00247 vector signed short *R, vector signed short *G, vector signed short *B)
00248 {
00249 vector signed short vx,ux,uvx;
00250
00251 Y = vec_mradds (Y, c->CY, c->OY);
00252 U = vec_sub (U,(vector signed short)
00253 vec_splat((vector signed short){128},0));
00254 V = vec_sub (V,(vector signed short)
00255 vec_splat((vector signed short){128},0));
00256
00257
00258 ux = vec_sl (U, c->CSHIFT);
00259 *B = vec_mradds (ux, c->CBU, Y);
00260
00261
00262 vx = vec_sl (V, c->CSHIFT);
00263 *R = vec_mradds (vx, c->CRV, Y);
00264
00265
00266 uvx = vec_mradds (U, c->CGU, Y);
00267 *G = vec_mradds (V, c->CGV, uvx);
00268 }
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278 #define DEFCSP420_CVT(name,out_pixels) \
00279 static int altivec_##name (SwsContext *c, \
00280 const unsigned char **in, int *instrides, \
00281 int srcSliceY, int srcSliceH, \
00282 unsigned char **oplanes, int *outstrides) \
00283 { \
00284 int w = c->srcW; \
00285 int h = srcSliceH; \
00286 int i,j; \
00287 int instrides_scl[3]; \
00288 vector unsigned char y0,y1; \
00289 \
00290 vector signed char u,v; \
00291 \
00292 vector signed short Y0,Y1,Y2,Y3; \
00293 vector signed short U,V; \
00294 vector signed short vx,ux,uvx; \
00295 vector signed short vx0,ux0,uvx0; \
00296 vector signed short vx1,ux1,uvx1; \
00297 vector signed short R0,G0,B0; \
00298 vector signed short R1,G1,B1; \
00299 vector unsigned char R,G,B; \
00300 \
00301 vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP; \
00302 vector unsigned char align_perm; \
00303 \
00304 vector signed short \
00305 lCY = c->CY, \
00306 lOY = c->OY, \
00307 lCRV = c->CRV, \
00308 lCBU = c->CBU, \
00309 lCGU = c->CGU, \
00310 lCGV = c->CGV; \
00311 \
00312 vector unsigned short lCSHIFT = c->CSHIFT; \
00313 \
00314 const ubyte *y1i = in[0]; \
00315 const ubyte *y2i = in[0]+instrides[0]; \
00316 const ubyte *ui = in[1]; \
00317 const ubyte *vi = in[2]; \
00318 \
00319 vector unsigned char *oute \
00320 = (vector unsigned char *) \
00321 (oplanes[0]+srcSliceY*outstrides[0]); \
00322 vector unsigned char *outo \
00323 = (vector unsigned char *) \
00324 (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]); \
00325 \
00326 \
00327 instrides_scl[0] = instrides[0]*2-w; \
00328 instrides_scl[1] = instrides[1]-w/2; \
00329 instrides_scl[2] = instrides[2]-w/2; \
00330 \
00331 \
00332 for (i=0;i<h/2;i++) { \
00333 vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0); \
00334 vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1); \
00335 \
00336 for (j=0;j<w/16;j++) { \
00337 \
00338 y1ivP = (vector unsigned char *)y1i; \
00339 y2ivP = (vector unsigned char *)y2i; \
00340 uivP = (vector unsigned char *)ui; \
00341 vivP = (vector unsigned char *)vi; \
00342 \
00343 align_perm = vec_lvsl (0, y1i); \
00344 y0 = (vector unsigned char) \
00345 vec_perm (y1ivP[0], y1ivP[1], align_perm); \
00346 \
00347 align_perm = vec_lvsl (0, y2i); \
00348 y1 = (vector unsigned char) \
00349 vec_perm (y2ivP[0], y2ivP[1], align_perm); \
00350 \
00351 align_perm = vec_lvsl (0, ui); \
00352 u = (vector signed char) \
00353 vec_perm (uivP[0], uivP[1], align_perm); \
00354 \
00355 align_perm = vec_lvsl (0, vi); \
00356 v = (vector signed char) \
00357 vec_perm (vivP[0], vivP[1], align_perm); \
00358 \
00359 u = (vector signed char) \
00360 vec_sub (u,(vector signed char) \
00361 vec_splat((vector signed char){128},0)); \
00362 v = (vector signed char) \
00363 vec_sub (v,(vector signed char) \
00364 vec_splat((vector signed char){128},0)); \
00365 \
00366 U = vec_unpackh (u); \
00367 V = vec_unpackh (v); \
00368 \
00369 \
00370 Y0 = vec_unh (y0); \
00371 Y1 = vec_unl (y0); \
00372 Y2 = vec_unh (y1); \
00373 Y3 = vec_unl (y1); \
00374 \
00375 Y0 = vec_mradds (Y0, lCY, lOY); \
00376 Y1 = vec_mradds (Y1, lCY, lOY); \
00377 Y2 = vec_mradds (Y2, lCY, lOY); \
00378 Y3 = vec_mradds (Y3, lCY, lOY); \
00379 \
00380 \
00381 ux = vec_sl (U, lCSHIFT); \
00382 ux = vec_mradds (ux, lCBU, (vector signed short){0}); \
00383 ux0 = vec_mergeh (ux,ux); \
00384 ux1 = vec_mergel (ux,ux); \
00385 \
00386 \
00387 vx = vec_sl (V, lCSHIFT); \
00388 vx = vec_mradds (vx, lCRV, (vector signed short){0}); \
00389 vx0 = vec_mergeh (vx,vx); \
00390 vx1 = vec_mergel (vx,vx); \
00391 \
00392 \
00393 uvx = vec_mradds (U, lCGU, (vector signed short){0}); \
00394 uvx = vec_mradds (V, lCGV, uvx); \
00395 uvx0 = vec_mergeh (uvx,uvx); \
00396 uvx1 = vec_mergel (uvx,uvx); \
00397 \
00398 R0 = vec_add (Y0,vx0); \
00399 G0 = vec_add (Y0,uvx0); \
00400 B0 = vec_add (Y0,ux0); \
00401 R1 = vec_add (Y1,vx1); \
00402 G1 = vec_add (Y1,uvx1); \
00403 B1 = vec_add (Y1,ux1); \
00404 \
00405 R = vec_packclp (R0,R1); \
00406 G = vec_packclp (G0,G1); \
00407 B = vec_packclp (B0,B1); \
00408 \
00409 out_pixels(R,G,B,oute); \
00410 \
00411 R0 = vec_add (Y2,vx0); \
00412 G0 = vec_add (Y2,uvx0); \
00413 B0 = vec_add (Y2,ux0); \
00414 R1 = vec_add (Y3,vx1); \
00415 G1 = vec_add (Y3,uvx1); \
00416 B1 = vec_add (Y3,ux1); \
00417 R = vec_packclp (R0,R1); \
00418 G = vec_packclp (G0,G1); \
00419 B = vec_packclp (B0,B1); \
00420 \
00421 \
00422 out_pixels(R,G,B,outo); \
00423 \
00424 y1i += 16; \
00425 y2i += 16; \
00426 ui += 8; \
00427 vi += 8; \
00428 \
00429 } \
00430 \
00431 outo += (outstrides[0])>>4; \
00432 oute += (outstrides[0])>>4; \
00433 \
00434 ui += instrides_scl[1]; \
00435 vi += instrides_scl[2]; \
00436 y1i += instrides_scl[0]; \
00437 y2i += instrides_scl[0]; \
00438 } \
00439 return srcSliceH; \
00440 }
00441
00442
00443 #define out_abgr(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),c,b,a,ptr)
00444 #define out_bgra(a,b,c,ptr) vec_mstrgb32(__typeof__(a),c,b,a,((__typeof__ (a)){255}),ptr)
00445 #define out_rgba(a,b,c,ptr) vec_mstrgb32(__typeof__(a),a,b,c,((__typeof__ (a)){255}),ptr)
00446 #define out_argb(a,b,c,ptr) vec_mstrgb32(__typeof__(a),((__typeof__ (a)){255}),a,b,c,ptr)
00447 #define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
00448 #define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
00449
00450 DEFCSP420_CVT (yuv2_abgr, out_abgr)
00451 DEFCSP420_CVT (yuv2_bgra, out_bgra)
00452 DEFCSP420_CVT (yuv2_rgba, out_rgba)
00453 DEFCSP420_CVT (yuv2_argb, out_argb)
00454 DEFCSP420_CVT (yuv2_rgb24, out_rgb24)
00455 DEFCSP420_CVT (yuv2_bgr24, out_bgr24)
00456
00457
00458
00459
00460 static
00461 const vector unsigned char
00462 demux_u = {0x10,0x00,0x10,0x00,
00463 0x10,0x04,0x10,0x04,
00464 0x10,0x08,0x10,0x08,
00465 0x10,0x0c,0x10,0x0c},
00466 demux_v = {0x10,0x02,0x10,0x02,
00467 0x10,0x06,0x10,0x06,
00468 0x10,0x0A,0x10,0x0A,
00469 0x10,0x0E,0x10,0x0E},
00470 demux_y = {0x10,0x01,0x10,0x03,
00471 0x10,0x05,0x10,0x07,
00472 0x10,0x09,0x10,0x0B,
00473 0x10,0x0D,0x10,0x0F};
00474
00475
00476
00477
00478 static int altivec_uyvy_rgb32 (SwsContext *c,
00479 const unsigned char **in, int *instrides,
00480 int srcSliceY, int srcSliceH,
00481 unsigned char **oplanes, int *outstrides)
00482 {
00483 int w = c->srcW;
00484 int h = srcSliceH;
00485 int i,j;
00486 vector unsigned char uyvy;
00487 vector signed short Y,U,V;
00488 vector signed short R0,G0,B0,R1,G1,B1;
00489 vector unsigned char R,G,B;
00490 vector unsigned char *out;
00491 const ubyte *img;
00492
00493 img = in[0];
00494 out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
00495
00496 for (i=0;i<h;i++) {
00497 for (j=0;j<w/16;j++) {
00498 uyvy = vec_ld (0, img);
00499 U = (vector signed short)
00500 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00501
00502 V = (vector signed short)
00503 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00504
00505 Y = (vector signed short)
00506 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00507
00508 cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
00509
00510 uyvy = vec_ld (16, img);
00511 U = (vector signed short)
00512 vec_perm (uyvy, (vector unsigned char){0}, demux_u);
00513
00514 V = (vector signed short)
00515 vec_perm (uyvy, (vector unsigned char){0}, demux_v);
00516
00517 Y = (vector signed short)
00518 vec_perm (uyvy, (vector unsigned char){0}, demux_y);
00519
00520 cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
00521
00522 R = vec_packclp (R0,R1);
00523 G = vec_packclp (G0,G1);
00524 B = vec_packclp (B0,B1);
00525
00526
00527 out_rgba (R,G,B,out);
00528
00529 img += 32;
00530 }
00531 }
00532 return srcSliceH;
00533 }
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543 SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
00544 {
00545 if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
00546 return NULL;
00547
00548
00549
00550
00551
00552
00553
00554
00555 if ((c->srcW & 0xf) != 0) return NULL;
00556
00557 switch (c->srcFormat) {
00558 case PIX_FMT_YUV410P:
00559 case PIX_FMT_YUV420P:
00560
00561 case PIX_FMT_GRAY8:
00562 case PIX_FMT_NV12:
00563 case PIX_FMT_NV21:
00564 if ((c->srcH & 0x1) != 0)
00565 return NULL;
00566
00567 switch(c->dstFormat) {
00568 case PIX_FMT_RGB24:
00569 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
00570 return altivec_yuv2_rgb24;
00571 case PIX_FMT_BGR24:
00572 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
00573 return altivec_yuv2_bgr24;
00574 case PIX_FMT_ARGB:
00575 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
00576 return altivec_yuv2_argb;
00577 case PIX_FMT_ABGR:
00578 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
00579 return altivec_yuv2_abgr;
00580 case PIX_FMT_RGBA:
00581 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
00582 return altivec_yuv2_rgba;
00583 case PIX_FMT_BGRA:
00584 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
00585 return altivec_yuv2_bgra;
00586 default: return NULL;
00587 }
00588 break;
00589
00590 case PIX_FMT_UYVY422:
00591 switch(c->dstFormat) {
00592 case PIX_FMT_BGR32:
00593 av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
00594 return altivec_uyvy_rgb32;
00595 default: return NULL;
00596 }
00597 break;
00598
00599 }
00600 return NULL;
00601 }
00602
00603 void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int brightness, int contrast, int saturation)
00604 {
00605 union {
00606 DECLARE_ALIGNED(16, signed short, tmp)[8];
00607 vector signed short vec;
00608 } buf;
00609
00610 buf.tmp[0] = ((0xffffLL) * contrast>>8)>>9;
00611 buf.tmp[1] = -256*brightness;
00612 buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);
00613 buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);
00614 buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));
00615 buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));
00616
00617
00618 c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
00619 c->CY = vec_splat ((vector signed short)buf.vec, 0);
00620 c->OY = vec_splat ((vector signed short)buf.vec, 1);
00621 c->CRV = vec_splat ((vector signed short)buf.vec, 2);
00622 c->CBU = vec_splat ((vector signed short)buf.vec, 3);
00623 c->CGU = vec_splat ((vector signed short)buf.vec, 4);
00624 c->CGV = vec_splat ((vector signed short)buf.vec, 5);
00625 return;
00626 }
00627
00628
00629 static av_always_inline void
00630 ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter,
00631 const int16_t **lumSrc, int lumFilterSize,
00632 const int16_t *chrFilter, const int16_t **chrUSrc,
00633 const int16_t **chrVSrc, int chrFilterSize,
00634 const int16_t **alpSrc, uint8_t *dest,
00635 int dstW, int dstY, enum PixelFormat target)
00636 {
00637 int i,j;
00638 vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
00639 vector signed short R0,G0,B0,R1,G1,B1;
00640
00641 vector unsigned char R,G,B;
00642 vector unsigned char *out,*nout;
00643
00644 vector signed short RND = vec_splat_s16(1<<3);
00645 vector unsigned short SCL = vec_splat_u16(4);
00646 DECLARE_ALIGNED(16, unsigned int, scratch)[16];
00647
00648 vector signed short *YCoeffs, *CCoeffs;
00649
00650 YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
00651 CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
00652
00653 out = (vector unsigned char *)dest;
00654
00655 for (i=0; i<dstW; i+=16) {
00656 Y0 = RND;
00657 Y1 = RND;
00658
00659 for (j=0; j<lumFilterSize; j++) {
00660 X0 = vec_ld (0, &lumSrc[j][i]);
00661 X1 = vec_ld (16, &lumSrc[j][i]);
00662 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00663 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00664 }
00665
00666 U = RND;
00667 V = RND;
00668
00669 for (j=0; j<chrFilterSize; j++) {
00670 X = vec_ld (0, &chrUSrc[j][i/2]);
00671 U = vec_mradds (X, CCoeffs[j], U);
00672 X = vec_ld (0, &chrVSrc[j][i/2]);
00673 V = vec_mradds (X, CCoeffs[j], V);
00674 }
00675
00676
00677 Y0 = vec_sra (Y0, SCL);
00678 Y1 = vec_sra (Y1, SCL);
00679 U = vec_sra (U, SCL);
00680 V = vec_sra (V, SCL);
00681
00682 Y0 = vec_clip_s16 (Y0);
00683 Y1 = vec_clip_s16 (Y1);
00684 U = vec_clip_s16 (U);
00685 V = vec_clip_s16 (V);
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696 U0 = vec_mergeh (U,U);
00697 V0 = vec_mergeh (V,V);
00698
00699 U1 = vec_mergel (U,U);
00700 V1 = vec_mergel (V,V);
00701
00702 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00703 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00704
00705 R = vec_packclp (R0,R1);
00706 G = vec_packclp (G0,G1);
00707 B = vec_packclp (B0,B1);
00708
00709 switch(target) {
00710 case PIX_FMT_ABGR: out_abgr (R,G,B,out); break;
00711 case PIX_FMT_BGRA: out_bgra (R,G,B,out); break;
00712 case PIX_FMT_RGBA: out_rgba (R,G,B,out); break;
00713 case PIX_FMT_ARGB: out_argb (R,G,B,out); break;
00714 case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
00715 case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
00716 default:
00717 {
00718
00719
00720 static int printed_error_message;
00721 if (!printed_error_message) {
00722 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00723 sws_format_name(c->dstFormat));
00724 printed_error_message=1;
00725 }
00726 return;
00727 }
00728 }
00729 }
00730
00731 if (i < dstW) {
00732 i -= 16;
00733
00734 Y0 = RND;
00735 Y1 = RND;
00736
00737 for (j=0; j<lumFilterSize; j++) {
00738 X0 = vec_ld (0, &lumSrc[j][i]);
00739 X1 = vec_ld (16, &lumSrc[j][i]);
00740 Y0 = vec_mradds (X0, YCoeffs[j], Y0);
00741 Y1 = vec_mradds (X1, YCoeffs[j], Y1);
00742 }
00743
00744 U = RND;
00745 V = RND;
00746
00747 for (j=0; j<chrFilterSize; j++) {
00748 X = vec_ld (0, &chrUSrc[j][i/2]);
00749 U = vec_mradds (X, CCoeffs[j], U);
00750 X = vec_ld (0, &chrVSrc[j][i/2]);
00751 V = vec_mradds (X, CCoeffs[j], V);
00752 }
00753
00754
00755 Y0 = vec_sra (Y0, SCL);
00756 Y1 = vec_sra (Y1, SCL);
00757 U = vec_sra (U, SCL);
00758 V = vec_sra (V, SCL);
00759
00760 Y0 = vec_clip_s16 (Y0);
00761 Y1 = vec_clip_s16 (Y1);
00762 U = vec_clip_s16 (U);
00763 V = vec_clip_s16 (V);
00764
00765
00766
00767
00768
00769
00770
00771
00772
00773
00774 U0 = vec_mergeh (U,U);
00775 V0 = vec_mergeh (V,V);
00776
00777 U1 = vec_mergel (U,U);
00778 V1 = vec_mergel (V,V);
00779
00780 cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
00781 cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
00782
00783 R = vec_packclp (R0,R1);
00784 G = vec_packclp (G0,G1);
00785 B = vec_packclp (B0,B1);
00786
00787 nout = (vector unsigned char *)scratch;
00788 switch(target) {
00789 case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break;
00790 case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break;
00791 case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break;
00792 case PIX_FMT_ARGB: out_argb (R,G,B,nout); break;
00793 case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
00794 case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
00795 default:
00796
00797 av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
00798 sws_format_name(c->dstFormat));
00799 return;
00800 }
00801
00802 memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
00803 }
00804
00805 }
00806
00807 #define YUV2PACKEDX_WRAPPER(suffix, pixfmt) \
00808 void ff_yuv2 ## suffix ## _X_altivec(SwsContext *c, const int16_t *lumFilter, \
00809 const int16_t **lumSrc, int lumFilterSize, \
00810 const int16_t *chrFilter, const int16_t **chrUSrc, \
00811 const int16_t **chrVSrc, int chrFilterSize, \
00812 const int16_t **alpSrc, uint8_t *dest, \
00813 int dstW, int dstY) \
00814 { \
00815 ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, \
00816 chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
00817 alpSrc, dest, dstW, dstY, pixfmt); \
00818 }
00819
00820 YUV2PACKEDX_WRAPPER(abgr, PIX_FMT_ABGR);
00821 YUV2PACKEDX_WRAPPER(bgra, PIX_FMT_BGRA);
00822 YUV2PACKEDX_WRAPPER(argb, PIX_FMT_ARGB);
00823 YUV2PACKEDX_WRAPPER(rgba, PIX_FMT_RGBA);
00824 YUV2PACKEDX_WRAPPER(rgb24, PIX_FMT_RGB24);
00825 YUV2PACKEDX_WRAPPER(bgr24, PIX_FMT_BGR24);