Skip to content

Commit

Permalink
postproc/postprocess_template: Fix alignment
Browse files Browse the repository at this point in the history
postProcess in postprocess_template.c copies a PPContext
to the stack, works with this copy and then copies
it back again. Said local copy uses a hardcoded alignment
of eight, although PPContext has alignment 32 since
cbe2700
(this commit was in anticipation of AVX2 code that never landed).
This leads to misalignment in the filter-(pp|pp1|pp2|pp3|qp)
FATE-tests which UBSan complains about. So avoid the local copy.

Signed-off-by: Andreas Rheinhardt <[email protected]>
  • Loading branch information
mkver committed Nov 9, 2022
1 parent b616b04 commit f0ee387
Showing 1 changed file with 77 additions and 79 deletions.
156 changes: 77 additions & 79 deletions libpostproc/postprocess_template.c
Original file line number Diff line number Diff line change
Expand Up @@ -2860,14 +2860,13 @@ static inline void RENAME(prefetcht2)(const void *p)
* Filter array of bytes (Y or U or V values)
*/
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
const int8_t QPs[], int QPStride, int isColor, PPContext *c2)
const int8_t QPs[], int QPStride, int isColor, PPContext *c)
{
DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access
int x,y;
#ifdef TEMPLATE_PP_TIME_MODE
const int mode= TEMPLATE_PP_TIME_MODE;
#else
const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
const int mode = isColor ? c->ppMode.chromMode : c->ppMode.lumMode;
#endif
int black=0, white=255; // blackest black and whitest white in the picture
int QPCorrecture= 256*256;
Expand All @@ -2877,29 +2876,29 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
int i;
#endif

const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
const int qpHShift = isColor ? 4 - c->hChromaSubSample : 4;
const int qpVShift = isColor ? 4 - c->vChromaSubSample : 4;

//FIXME remove
uint64_t * const yHistogram= c.yHistogram;
uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
uint8_t * const tempDst= (dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride) + 32;
uint64_t * const yHistogram= c->yHistogram;
uint8_t * const tempSrc = srcStride > 0 ? c->tempSrc : c->tempSrc - 23*srcStride;
uint8_t * const tempDst = (dstStride > 0 ? c->tempDst : c->tempDst - 23*dstStride) + 32;
//const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;

if (mode & VISUALIZE){
if(!(mode & (V_A_DEBLOCK | H_A_DEBLOCK)) || TEMPLATE_PP_MMX) {
av_log(c2, AV_LOG_WARNING, "Visualization is currently only supported with the accurate deblock filter without SIMD\n");
av_log(c, AV_LOG_WARNING, "Visualization is currently only supported with the accurate deblock filter without SIMD\n");
}
}

#if TEMPLATE_PP_MMX
for(i=0; i<57; i++){
int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
int offset = ((i * c->ppMode.baseDcDiff) >> 8) + 1;
int threshold= offset*2 + 1;
c.mmxDcOffset[i]= 0x7F - offset;
c.mmxDcThreshold[i]= 0x7F - threshold;
c.mmxDcOffset[i]*= 0x0101010101010101LL;
c.mmxDcThreshold[i]*= 0x0101010101010101LL;
c->mmxDcOffset[i] = 0x7F - offset;
c->mmxDcThreshold[i] = 0x7F - threshold;
c->mmxDcOffset[i] *= 0x0101010101010101LL;
c->mmxDcThreshold[i] *= 0x0101010101010101LL;
}
#endif

Expand All @@ -2925,16 +2924,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
uint64_t clipped;
AVRational scale;

c.frameNum++;
c->frameNum++;
// first frame is fscked so we ignore it
if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256;
if (c->frameNum == 1)
yHistogram[0] = width * (uint64_t)height/64*15/256;

for(i=0; i<256; i++){
sum+= yHistogram[i];
}

/* We always get a completely black picture first. */
maxClipped= av_rescale(sum, c.ppMode.maxClippedThreshold.num, c.ppMode.maxClippedThreshold.den);
maxClipped = av_rescale(sum, c->ppMode.maxClippedThreshold.num,
c->ppMode.maxClippedThreshold.den);

clipped= sum;
for(black=255; black>0; black--){
Expand All @@ -2948,27 +2949,27 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
clipped-= yHistogram[white];
}

scale = (AVRational){c.ppMode.maxAllowedY - c.ppMode.minAllowedY, white - black};
scale = (AVRational){c->ppMode.maxAllowedY - c->ppMode.minAllowedY, white - black};

#if TEMPLATE_PP_MMXEXT
c.packedYScale = (uint16_t)av_rescale(scale.num, 256, scale.den);
c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
c->packedYScale = (uint16_t)av_rescale(scale.num, 256, scale.den);
c->packedYOffset = (((black*c->packedYScale)>>8) - c->ppMode.minAllowedY) & 0xFFFF;
#else
c.packedYScale = (uint16_t)av_rescale(scale.num, 1024, scale.den);
c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
c->packedYScale = (uint16_t)av_rescale(scale.num, 1024, scale.den);
c->packedYOffset = (black - c->ppMode.minAllowedY) & 0xFFFF;
#endif

c.packedYOffset|= c.packedYOffset<<32;
c.packedYOffset|= c.packedYOffset<<16;
c->packedYOffset |= c->packedYOffset<<32;
c->packedYOffset |= c->packedYOffset<<16;

c.packedYScale|= c.packedYScale<<32;
c.packedYScale|= c.packedYScale<<16;
c->packedYScale |= c->packedYScale<<32;
c->packedYScale |= c->packedYScale<<16;

if(mode & LEVEL_FIX) QPCorrecture= (int)av_rescale(scale.num, 256*256, scale.den);
else QPCorrecture= 256*256;
}else{
c.packedYScale= 0x0100010001000100LL;
c.packedYOffset= 0;
c->packedYScale = 0x0100010001000100LL;
c->packedYOffset = 0;
QPCorrecture= 256*256;
}

Expand All @@ -2988,22 +2989,22 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);

RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c->packedYOffset);

RENAME(duplicate)(dstBlock + dstStride*8, dstStride);

if(mode & LINEAR_IPOL_DEINT_FILTER)
RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
else if(mode & LINEAR_BLEND_DEINT_FILTER)
RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c->deintTemp + x);
else if(mode & MEDIAN_DEINT_FILTER)
RENAME(deInterlaceMedian)(dstBlock, dstStride);
else if(mode & CUBIC_IPOL_DEINT_FILTER)
RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
else if(mode & FFMPEG_DEINT_FILTER)
RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
RENAME(deInterlaceFF)(dstBlock, dstStride, c->deintTemp + x);
else if(mode & LOWPASS5_DEINT_FILTER)
RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
RENAME(deInterlaceL5)(dstBlock, dstStride, c->deintTemp + x, c->deintTemp + width + x);
/* else if(mode & CUBIC_BLEND_DEINT_FILTER)
RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
*/
Expand All @@ -3025,11 +3026,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
const uint8_t *srcBlock= &(src[y*srcStride]);
uint8_t *dstBlock= &(dst[y*dstStride]);
#if TEMPLATE_PP_MMX
uint8_t *tempBlock1= c.tempBlocks;
uint8_t *tempBlock2= c.tempBlocks + 8;
uint8_t *tempBlock1 = c->tempBlocks;
uint8_t *tempBlock2 = c->tempBlocks + 8;
#endif
const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
int8_t *nonBQPptr = &c->nonBQPTable[(y>>qpVShift)*FFABS(QPStride)];
int QP=0, nonBQP=0;
/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
if not than use a temporary buffer */
Expand Down Expand Up @@ -3072,16 +3073,16 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
nonBQP= (nonBQP* QPCorrecture + 256*128)>>16;
yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++;
}
c.QP_block[qp_index] = QP;
c.nonBQP_block[qp_index] = nonBQP;
c->QP_block[qp_index] = QP;
c->nonBQP_block[qp_index] = nonBQP;
#if TEMPLATE_PP_MMX
__asm__ volatile(
"movd %1, %%mm7 \n\t"
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
"packuswb %%mm7, %%mm7 \n\t" // QP,..., QP
"movq %%mm7, %0 \n\t"
: "=m" (c.pQPb_block[qp_index])
: "=m" (c->pQPb_block[qp_index])
: "r" (QP)
);
#endif
Expand All @@ -3093,20 +3094,20 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32);

RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c->packedYOffset);

if(mode & LINEAR_IPOL_DEINT_FILTER)
RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
else if(mode & LINEAR_BLEND_DEINT_FILTER)
RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c->deintTemp + x);
else if(mode & MEDIAN_DEINT_FILTER)
RENAME(deInterlaceMedian)(dstBlock, dstStride);
else if(mode & CUBIC_IPOL_DEINT_FILTER)
RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
else if(mode & FFMPEG_DEINT_FILTER)
RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
RENAME(deInterlaceFF)(dstBlock, dstStride, c->deintTemp + x);
else if(mode & LOWPASS5_DEINT_FILTER)
RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
RENAME(deInterlaceL5)(dstBlock, dstStride, c->deintTemp + x, c->deintTemp + width + x);
/* else if(mode & CUBIC_BLEND_DEINT_FILTER)
RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
*/
Expand All @@ -3121,24 +3122,24 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
const int stride= dstStride;
//temporary while changing QP stuff to make things continue to work
//eventually QP,nonBQP,etc will be arrays and this will be unnecessary
c.QP = c.QP_block[qp_index];
c.nonBQP = c.nonBQP_block[qp_index];
c.pQPb = c.pQPb_block[qp_index];
c.pQPb2 = c.pQPb2_block[qp_index];
c->QP = c->QP_block[qp_index];
c->nonBQP = c->nonBQP_block[qp_index];
c->pQPb = c->pQPb_block[qp_index];
c->pQPb2 = c->pQPb2_block[qp_index];

/* only deblock if we have 2 blocks */
if(y + 8 < height){
if(mode & V_X1_FILTER)
RENAME(vertX1Filter)(dstBlock, stride, &c);
RENAME(vertX1Filter)(dstBlock, stride, c);
else if(mode & V_DEBLOCK){
const int t= RENAME(vertClassify)(dstBlock, stride, &c);
const int t = RENAME(vertClassify)(dstBlock, stride, c);

if(t==1)
RENAME(doVertLowPass)(dstBlock, stride, &c);
RENAME(doVertLowPass)(dstBlock, stride, c);
else if(t==2)
RENAME(doVertDefFilter)(dstBlock, stride, &c);
RENAME(doVertDefFilter)(dstBlock, stride, c);
}else if(mode & V_A_DEBLOCK){
RENAME(do_a_deblock)(dstBlock, stride, 1, &c, mode);
RENAME(do_a_deblock)(dstBlock, stride, 1, c, mode);
}
}

Expand All @@ -3151,71 +3152,71 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[

for(x = startx, qp_index=0; x < endx; x+=BLOCK_SIZE, qp_index++){
const int stride= dstStride;
c.QP = c.QP_block[qp_index];
c.nonBQP = c.nonBQP_block[qp_index];
c.pQPb = c.pQPb_block[qp_index];
c.pQPb2 = c.pQPb2_block[qp_index];
c->QP = c->QP_block[qp_index];
c->nonBQP = c->nonBQP_block[qp_index];
c->pQPb = c->pQPb_block[qp_index];
c->pQPb2 = c->pQPb2_block[qp_index];
#if TEMPLATE_PP_MMX
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
#endif
/* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0){
#if TEMPLATE_PP_MMX
if(mode & H_X1_FILTER)
RENAME(vertX1Filter)(tempBlock1, 16, &c);
RENAME(vertX1Filter)(tempBlock1, 16, c);
else if(mode & H_DEBLOCK){
const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
const int t= RENAME(vertClassify)(tempBlock1, 16, c);
if(t==1)
RENAME(doVertLowPass)(tempBlock1, 16, &c);
RENAME(doVertLowPass)(tempBlock1, 16, c);
else if(t==2)
RENAME(doVertDefFilter)(tempBlock1, 16, &c);
RENAME(doVertDefFilter)(tempBlock1, 16, c);
}else if(mode & H_A_DEBLOCK){
RENAME(do_a_deblock)(tempBlock1, 16, 1, &c, mode);
RENAME(do_a_deblock)(tempBlock1, 16, 1, c, mode);
}

RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);

#else
if(mode & H_X1_FILTER)
horizX1Filter(dstBlock-4, stride, c.QP);
horizX1Filter(dstBlock-4, stride, c->QP);
else if(mode & H_DEBLOCK){
#if TEMPLATE_PP_ALTIVEC
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
int t;
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);

t = vertClassify_altivec(tempBlock-48, 16, &c);
t = vertClassify_altivec(tempBlock-48, 16, c);
if(t==1) {
doVertLowPass_altivec(tempBlock-48, 16, &c);
doVertLowPass_altivec(tempBlock-48, 16, c);
transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
}
else if(t==2) {
doVertDefFilter_altivec(tempBlock-48, 16, &c);
doVertDefFilter_altivec(tempBlock-48, 16, c);
transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
}
#else
const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
const int t= RENAME(horizClassify)(dstBlock-4, stride, c);

if(t==1)
RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
RENAME(doHorizLowPass)(dstBlock-4, stride, c);
else if(t==2)
RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
RENAME(doHorizDefFilter)(dstBlock-4, stride, c);
#endif
}else if(mode & H_A_DEBLOCK){
RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c, mode);
RENAME(do_a_deblock)(dstBlock-8, 1, stride, c, mode);
}
#endif //TEMPLATE_PP_MMX
if(mode & DERING){
//FIXME filter first line
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, c);
}

if(mode & TEMP_NOISE_FILTER)
{
RENAME(tempNoiseReducer)(dstBlock-8, stride,
c.tempBlurred[isColor] + y*dstStride + x,
c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
c.ppMode.maxTmpNoise);
c->tempBlurred[isColor] + y*dstStride + x,
c->tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
c->ppMode.maxTmpNoise);
}
}

Expand All @@ -3229,14 +3230,14 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
}

if(mode & DERING){
if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, c);
}

if((mode & TEMP_NOISE_FILTER)){
RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
c.tempBlurred[isColor] + y*dstStride + x,
c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
c.ppMode.maxTmpNoise);
c->tempBlurred[isColor] + y*dstStride + x,
c->tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
c->ppMode.maxTmpNoise);
}

/* did we use a tmp buffer for the last lines*/
Expand Down Expand Up @@ -3278,9 +3279,6 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
}
}
#endif

*c2= c; //copy local context back

}

#undef RENAME
Expand Down

0 comments on commit f0ee387

Please sign in to comment.