39 { 36, 68, 60, 92, 34, 66, 58, 90, },
40 { 100, 4, 124, 28, 98, 2, 122, 26, },
41 { 52, 84, 44, 76, 50, 82, 42, 74, },
42 { 116, 20, 108, 12, 114, 18, 106, 10, },
43 { 32, 64, 56, 88, 38, 70, 62, 94, },
44 { 96, 0, 120, 24, 102, 6, 126, 30, },
45 { 48, 80, 40, 72, 54, 86, 46, 78, },
46 { 112, 16, 104, 8, 118, 22, 110, 14, },
50 64, 64, 64, 64, 64, 64, 64, 64
57 uint8_t *ptr = plane + stride * y;
58 for (i = 0; i <
height; i++) {
59 memset(ptr, val, width);
66 const int dst_depth,
const int big_endian)
69 uint16_t *dst = (uint16_t *) (plane + stride * y);
70 #define FILL8TO9_OR_10(wfunc) \
71 for (i = 0; i < height; i++) { \
72 for (j = 0; j < width; j++) { \
73 wfunc(&dst[j], (val << (dst_depth - 8)) | \
74 (val >> (16 - dst_depth))); \
88 const int32_t *filterPos,
int filterSize)
93 const uint16_t *src = (
const uint16_t *) _src;
97 for (i = 0; i <
dstW; i++) {
99 int srcPos = filterPos[i];
102 for (j = 0; j < filterSize; j++) {
103 val += src[srcPos + j] * filter[filterSize * i + j];
106 dst[i] =
FFMIN(val >> sh, (1 << 19) - 1);
112 const int32_t *filterPos,
int filterSize)
116 const uint16_t *src = (
const uint16_t *) _src;
119 for (i = 0; i <
dstW; i++) {
121 int srcPos = filterPos[i];
124 for (j = 0; j < filterSize; j++) {
125 val += src[srcPos + j] * filter[filterSize * i + j];
128 dst[i] =
FFMIN(val >> sh, (1 << 15) - 1);
135 const int32_t *filterPos,
int filterSize)
138 for (i = 0; i <
dstW; i++) {
140 int srcPos = filterPos[i];
142 for (j = 0; j < filterSize; j++) {
143 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
145 dst[i] =
FFMIN(val >> 7, (1 << 15) - 1);
151 const int32_t *filterPos,
int filterSize)
155 for (i = 0; i <
dstW; i++) {
157 int srcPos = filterPos[i];
159 for (j = 0; j < filterSize; j++) {
160 val += ((int)src[srcPos + j]) * filter[filterSize * i + j];
162 dst[i] =
FFMIN(val >> 3, (1 << 19) - 1);
171 for (i = 0; i <
width; i++) {
172 dstU[i] = (
FFMIN(dstU[i], 30775) * 4663 - 9289992) >> 12;
173 dstV[i] = (
FFMIN(dstV[i], 30775) * 4663 - 9289992) >> 12;
180 for (i = 0; i <
width; i++) {
181 dstU[i] = (dstU[i] * 1799 + 4081085) >> 11;
182 dstV[i] = (dstV[i] * 1799 + 4081085) >> 11;
189 for (i = 0; i <
width; i++)
190 dst[i] = (
FFMIN(dst[i], 30189) * 19077 - 39057361) >> 14;
196 for (i = 0; i <
width; i++)
197 dst[i] = (dst[i] * 14071 + 33561947) >> 14;
205 for (i = 0; i <
width; i++) {
206 dstU[i] = (
FFMIN(dstU[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
207 dstV[i] = (
FFMIN(dstV[i], 30775 << 4) * 4663 - (9289992 << 4)) >> 12;
216 for (i = 0; i <
width; i++) {
217 dstU[i] = (dstU[i] * 1799 + (4081085 << 4)) >> 11;
218 dstV[i] = (dstV[i] * 1799 + (4081085 << 4)) >> 11;
226 for (i = 0; i <
width; i++)
227 dst[i] = (
FFMIN(dst[i], 30189 << 4) * 4769 - (39057361 << 2)) >> 12;
234 for (i = 0; i <
width; i++)
235 dst[i] = (dst[i] * 14071 + (33561947 << 4)) >> 14;
242 unsigned int xpos = 0;
243 for (i = 0; i < dstWidth; i++) {
244 register unsigned int xx = xpos >> 16;
245 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
246 dst[i] = (src[xx] << 7) + (src[xx + 1] - src[xx]) * xalpha;
259 uint32_t *pal,
int isAlpha)
264 const uint8_t *src = src_in[isAlpha ? 3 : 0];
267 toYV12(formatConvBuffer, src, srcW, pal);
275 c->
hyScale(c, dst, dstWidth, src, hLumFilter,
276 hLumFilterPos, hLumFilterSize);
282 convertRange(dst, dstWidth);
286 int dstWidth,
const uint8_t *src1,
290 unsigned int xpos = 0;
291 for (i = 0; i < dstWidth; i++) {
292 register unsigned int xx = xpos >> 16;
293 register unsigned int xalpha = (xpos & 0xFFFF) >> 9;
294 dst1[i] = (src1[xx] * (xalpha ^ 127) + src1[xx + 1] * xalpha);
295 dst2[i] = (src2[xx] * (xalpha ^ 127) + src2[xx + 1] * xalpha);
301 int16_t *dst2,
int dstWidth,
309 const uint8_t *src1 = src_in[1], *src2 = src_in[2];
311 uint8_t *buf2 = formatConvBuffer +
313 c->
chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal);
317 uint8_t *buf2 = formatConvBuffer +
325 c->
hcScale(c, dst1, dstWidth, src1, hChrFilter, hChrFilterPos, hChrFilterSize);
326 c->
hcScale(c, dst2, dstWidth, src2, hChrFilter, hChrFilterPos, hChrFilterSize);
328 c->
hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc);
335 #define DEBUG_SWSCALE_BUFFERS 0
336 #define DEBUG_BUFFERS(...) \
337 if (DEBUG_SWSCALE_BUFFERS) \
338 av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
341 int srcStride[],
int srcSliceY,
342 int srcSliceH,
uint8_t *dst[],
int dstStride[])
404 srcStride[3] = srcStride[0];
409 DEBUG_BUFFERS(
"swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
410 src[0], srcStride[0], src[1], srcStride[1],
411 src[2], srcStride[2], src[3], srcStride[3],
412 dst[0], dstStride[0], dst[1], dstStride[1],
413 dst[2], dstStride[2], dst[3], dstStride[3]);
414 DEBUG_BUFFERS(
"srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
415 srcSliceY, srcSliceH, dstY, dstH);
416 DEBUG_BUFFERS(
"vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
417 vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
419 if (dstStride[0] % 8 != 0 || dstStride[1] % 8 != 0 ||
420 dstStride[2] % 8 != 0 || dstStride[3] % 8 != 0) {
421 static int warnedAlready = 0;
424 "Warning: dstStride is not aligned!\n"
425 " ->cannot do aligned memory accesses anymore\n");
433 if (srcSliceY == 0) {
441 if (!should_dither) {
446 for (; dstY <
dstH; dstY++) {
449 dst[0] + dstStride[0] *
dstY,
450 dst[1] + dstStride[1] * chrDstY,
451 dst[2] + dstStride[2] * chrDstY,
456 const int firstLumSrcY =
FFMAX(1 - vLumFilterSize, vLumFilterPos[dstY]);
459 const int firstChrSrcY =
FFMAX(1 - vChrFilterSize, vChrFilterPos[chrDstY]);
462 int lastLumSrcY =
FFMIN(c->
srcH, firstLumSrcY + vLumFilterSize) - 1;
463 int lastLumSrcY2 =
FFMIN(c->
srcH, firstLumSrcY2 + vLumFilterSize) - 1;
464 int lastChrSrcY =
FFMIN(c->
chrSrcH, firstChrSrcY + vChrFilterSize) - 1;
468 if (firstLumSrcY > lastInLumBuf)
469 lastInLumBuf = firstLumSrcY - 1;
470 if (firstChrSrcY > lastInChrBuf)
471 lastInChrBuf = firstChrSrcY - 1;
472 assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
473 assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
476 DEBUG_BUFFERS(
"\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
477 firstLumSrcY, lastLumSrcY, lastInLumBuf);
478 DEBUG_BUFFERS(
"\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
479 firstChrSrcY, lastChrSrcY, lastInChrBuf);
482 enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH &&
486 lastLumSrcY = srcSliceY + srcSliceH - 1;
487 lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
488 DEBUG_BUFFERS(
"buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
489 lastLumSrcY, lastChrSrcY);
493 while (lastInLumBuf < lastLumSrcY) {
495 src[0] + (lastInLumBuf + 1 - srcSliceY) * srcStride[0],
496 src[1] + (lastInLumBuf + 1 - srcSliceY) * srcStride[1],
497 src[2] + (lastInLumBuf + 1 - srcSliceY) * srcStride[2],
498 src[3] + (lastInLumBuf + 1 - srcSliceY) * srcStride[3],
501 assert(lumBufIndex < 2 * vLumBufSize);
502 assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
503 assert(lastInLumBuf + 1 - srcSliceY >= 0);
504 hyscale(c, lumPixBuf[lumBufIndex], dstW, src1, srcW, lumXInc,
505 hLumFilter, hLumFilterPos, hLumFilterSize,
506 formatConvBuffer, pal, 0);
508 hyscale(c, alpPixBuf[lumBufIndex], dstW, src1, srcW,
509 lumXInc, hLumFilter, hLumFilterPos, hLumFilterSize,
510 formatConvBuffer, pal, 1);
513 lumBufIndex, lastInLumBuf);
515 while (lastInChrBuf < lastChrSrcY) {
517 src[0] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[0],
518 src[1] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[1],
519 src[2] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[2],
520 src[3] + (lastInChrBuf + 1 - chrSrcSliceY) * srcStride[3],
523 assert(chrBufIndex < 2 * vChrBufSize);
524 assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
525 assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
529 hcscale(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex],
530 chrDstW, src1, chrSrcW, chrXInc,
531 hChrFilter, hChrFilterPos, hChrFilterSize,
532 formatConvBuffer, pal);
535 chrBufIndex, lastInChrBuf);
538 if (lumBufIndex >= vLumBufSize)
540 if (chrBufIndex >= vChrBufSize)
547 lastInLumBuf, lastInChrBuf);
553 if (dstY >= dstH - 2) {
557 &yuv2packed1, &yuv2packed2, &yuv2packedX);
561 const int16_t **lumSrcPtr = (
const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
562 const int16_t **chrUSrcPtr = (
const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
563 const int16_t **chrVSrcPtr = (
const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
565 (
const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize :
NULL;
567 if (firstLumSrcY < 0 || firstLumSrcY + vLumFilterSize > c->
srcH) {
568 const int16_t **tmpY = (
const int16_t **)lumPixBuf +
570 int neg = -firstLumSrcY, i;
571 int end =
FFMIN(c->
srcH - firstLumSrcY, vLumFilterSize);
572 for (i = 0; i < neg; i++)
573 tmpY[i] = lumSrcPtr[neg];
575 tmpY[i] = lumSrcPtr[i];
577 tmpY[i] = tmpY[i - 1];
581 const int16_t **tmpA = (
const int16_t **)alpPixBuf +
583 for (i = 0; i < neg; i++)
584 tmpA[i] = alpSrcPtr[neg];
586 tmpA[i] = alpSrcPtr[i];
588 tmpA[i] = tmpA[i - 1];
592 if (firstChrSrcY < 0 ||
593 firstChrSrcY + vChrFilterSize > c->
chrSrcH) {
594 const int16_t **tmpU = (
const int16_t **)chrUPixBuf + 2 * vChrBufSize,
595 **tmpV = (
const int16_t **)chrVPixBuf + 2 *
vChrBufSize;
596 int neg = -firstChrSrcY, i;
597 int end =
FFMIN(c->
chrSrcH - firstChrSrcY, vChrFilterSize);
598 for (i = 0; i < neg; i++) {
599 tmpU[i] = chrUSrcPtr[neg];
600 tmpV[i] = chrVSrcPtr[neg];
602 for (; i < end; i++) {
603 tmpU[i] = chrUSrcPtr[i];
604 tmpV[i] = chrVSrcPtr[i];
607 tmpU[i] = tmpU[i - 1];
608 tmpV[i] = tmpV[i - 1];
618 if (vLumFilterSize == 1) {
621 yuv2planeX(vLumFilter + dstY * vLumFilterSize,
622 vLumFilterSize, lumSrcPtr, dest[0],
626 if (!((dstY & chrSkipMask) ||
isGray(dstFormat))) {
628 yuv2nv12cX(c, vChrFilter + chrDstY * vChrFilterSize,
629 vChrFilterSize, chrUSrcPtr, chrVSrcPtr,
631 }
else if (vChrFilterSize == 1) {
635 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
636 vChrFilterSize, chrUSrcPtr, dest[1],
638 yuv2planeX(vChrFilter + chrDstY * vChrFilterSize,
639 vChrFilterSize, chrVSrcPtr, dest[2],
645 if (vLumFilterSize == 1) {
649 yuv2planeX(vLumFilter + dstY * vLumFilterSize,
650 vLumFilterSize, alpSrcPtr, dest[3],
656 vChrFilterSize <= 2) {
657 int chrAlpha = vChrFilterSize == 1 ? 0 : vChrFilter[2 * dstY + 1];
659 alpPixBuf ? *alpSrcPtr : NULL,
660 dest[0], dstW, chrAlpha, dstY);
661 }
else if (c->
yuv2packed2 && vLumFilterSize == 2 &&
662 vChrFilterSize == 2) {
663 int lumAlpha = vLumFilter[2 * dstY + 1];
664 int chrAlpha = vChrFilter[2 * dstY + 1];
666 lumMmxFilter[3] = vLumFilter[2 *
dstY] * 0x10001;
668 chrMmxFilter[3] = vChrFilter[2 * chrDstY] * 0x10001;
670 alpPixBuf ? alpSrcPtr : NULL,
671 dest[0], dstW, lumAlpha, chrAlpha, dstY);
674 lumSrcPtr, vLumFilterSize,
675 vChrFilter + dstY * vChrFilterSize,
676 chrUSrcPtr, chrVSrcPtr, vChrFilterSize,
677 alpSrcPtr, dest[0], dstW, dstY);
685 int height = dstY - lastDstY;
695 fillPlane(dst[3], dstStride[3], length, height, lastDstY, 255);
698 #if HAVE_MMXEXT_INLINE
700 __asm__
volatile (
"sfence" :::
"memory");
711 return dstY - lastDstY;