avcodec/vvc/inter: Combine offsets early

For bi-predicted weighted averages, only the sum
of the two offsets is ever used, so add the two early.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2026-02-20 18:24:24 +01:00
parent fc7c38f9da
commit dc65dcec22
9 changed files with 18 additions and 21 deletions

View File

@@ -107,10 +107,10 @@ void ff_vvc_w_avg_12_neon(uint8_t *_dst, ptrdiff_t _dst_stride,
#define W_AVG_FUN(bit_depth) \
static void vvc_w_avg_ ## bit_depth(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height, \
int denom, int w0, int w1, int o0, int o1) \
int denom, int w0, int w1, int o) \
{ \
int shift = denom + FFMAX(3, 15 - bit_depth); \
int offset = ((o0 + o1) * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
int offset = (o * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
uintptr_t w0_w1 = ((uintptr_t)w0 << 32) | (uint32_t)w1; \
uintptr_t offset_shift = ((uintptr_t)offset << 32) | (uint32_t)shift; \
ff_vvc_w_avg_ ## bit_depth ## _neon(dst, dst_stride, src0, src1, width, height, w0_w1, offset_shift); \

View File

@@ -34,7 +34,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *src0, const int16_t *src1, int width, int height); \
void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height, \
int denom, int w0, int w1, int o0, int o1);
int denom, int w0, int w1, int o);
AVG_PROTOTYPES(8, rvv_128)
AVG_PROTOTYPES(8, rvv_256)

View File

@@ -163,9 +163,7 @@ func ff_vvc_w_avg_8_rvv_\vlen\(), zve32x, zbb, zba
addi t6, a6, 7
ld t3, (sp)
ld t4, 8(sp)
ld t5, 16(sp)
addi t4, t4, 1 // o0 + o1 + 1
add t4, t4, t5
addi t5, t6, -1 // shift - 1
sll t4, t4, t5
POW2_J \vlen, 2, a4

View File

@@ -75,7 +75,7 @@ typedef struct VVCInterDSPContext {
void (*w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
const int16_t *src0, const int16_t *src1, int width, int height,
int denom, int w0, int w1, int o0, int o1);
int denom, int w0, int w1, int o);
void (*put_ciip)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height,
const uint8_t *inter, ptrdiff_t inter_stride, int inter_weight);

View File

@@ -232,22 +232,22 @@ static void apply_averaging(uint8_t *dst, const ptrdiff_t dst_stride,
return;
}
int denom, w0, w1, o1, o2;
int denom, w0, w1, o;
if (bcw_idx) {
denom = 2;
w1 = bcw_w_lut[bcw_idx];
w0 = 8 - w1;
o1 = o2 = 0;
o = 0;
} else {
const PredWeightTable *w = pps->r->pps_wp_info_in_ph_flag ? &fc->ps.ph.pwt : &sh->pwt;
denom = w->log2_denom[c_idx > 0];
w0 = w->weight[L0][c_idx][mvf->ref_idx[L0]];
w1 = w->weight[L1][c_idx][mvf->ref_idx[L1]];
o1 = w->offset[L0][c_idx][mvf->ref_idx[L0]];
o2 = w->offset[L1][c_idx][mvf->ref_idx[L1]];
o = w->offset[L0][c_idx][mvf->ref_idx[L0]]
+ w->offset[L1][c_idx][mvf->ref_idx[L1]];
}
fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o1, o2);
fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o);
}
#define INTER_FILTER(t, frac) (is_chroma ? ff_vvc_inter_chroma_filters[t][frac] : ff_vvc_inter_luma_filters[t][frac])

View File

@@ -201,12 +201,12 @@ static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
const int16_t *src0, const int16_t *src1, const int width, const int height,
const int denom, const int w0, const int w1, const int o0, const int o1)
const int denom, const int w0, const int w1, const int o)
{
pixel *dst = (pixel*)_dst;
const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
const int shift = denom + FFMAX(3, 15 - BIT_DEPTH);
const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
const int offset = (o * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++)

View File

@@ -231,7 +231,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height);\
void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
const int16_t *src0, const int16_t *src1, int width, int height, \
int denom, int w0, int w1, int o0, int o1); \
int denom, int w0, int w1, int o); \
c->inter.avg = bf(ff_vvc_avg, bd, opt); \
c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
} while (0)

View File

@@ -244,7 +244,7 @@ cglobal vvc_avg_%2, 4, 7, 5, dst, stride, src0, src1, w, h
;void ff_vvc_w_avg_%2_avx(uint8_t *dst, ptrdiff_t dst_stride,
; const int16_t *src0, const int16_t *src1, int width, int height,
; int denom, intptr_t w0, int w1, int o0, int o1);
; int denom, intptr_t w0, int w1, int o);
%macro VVC_W_AVG_AVX2 3
cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w, h
%if UNIX64
@@ -256,8 +256,7 @@ cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w,
%endif
mov t1d, r6m ; denom
mov t0d, r9m ; o0
add t0d, r10m ; o1
mov t0d, r9m ; o0 + o1
movifnidn t2d, r8m ; w1
add t1d, 15-%2
%if %2 != 8

View File

@@ -306,7 +306,7 @@ static void check_avg(void)
{
declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *src0, const int16_t *src1, int width, int height,
int denom, int w0, int w1, int o0, int o1);
int denom, int w0, int w1, int o);
{
const int denom = rnd() % 8;
const int w0 = rnd() % 256 - 128;
@@ -317,12 +317,12 @@ static void check_avg(void)
memset(dst0, 0, AVG_DST_BUF_SIZE);
memset(dst1, 0, AVG_DST_BUF_SIZE);
call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1);
call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0 + o1);
if (memcmp(dst0, dst1, DST_BUF_SIZE))
fail();
if (w == h)
bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
}
}
}