mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
avcodec/vvc/inter: Combine offsets early
For bi-predicted weighted averages, only the sum of the two offsets is ever used, so add the two early. Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -107,10 +107,10 @@ void ff_vvc_w_avg_12_neon(uint8_t *_dst, ptrdiff_t _dst_stride,
|
||||
#define W_AVG_FUN(bit_depth) \
|
||||
static void vvc_w_avg_ ## bit_depth(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||
const int16_t *src0, const int16_t *src1, int width, int height, \
|
||||
int denom, int w0, int w1, int o0, int o1) \
|
||||
int denom, int w0, int w1, int o) \
|
||||
{ \
|
||||
int shift = denom + FFMAX(3, 15 - bit_depth); \
|
||||
int offset = ((o0 + o1) * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
|
||||
int offset = (o * (1 << (bit_depth - 8)) + 1) * (1 << (shift - 1)); \
|
||||
uintptr_t w0_w1 = ((uintptr_t)w0 << 32) | (uint32_t)w1; \
|
||||
uintptr_t offset_shift = ((uintptr_t)offset << 32) | (uint32_t)shift; \
|
||||
ff_vvc_w_avg_ ## bit_depth ## _neon(dst, dst_stride, src0, src1, width, height, w0_w1, offset_shift); \
|
||||
|
||||
@@ -34,7 +34,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *src0, const int16_t *src1, int width, int height); \
|
||||
void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||
const int16_t *src0, const int16_t *src1, int width, int height, \
|
||||
int denom, int w0, int w1, int o0, int o1);
|
||||
int denom, int w0, int w1, int o);
|
||||
|
||||
AVG_PROTOTYPES(8, rvv_128)
|
||||
AVG_PROTOTYPES(8, rvv_256)
|
||||
|
||||
@@ -163,9 +163,7 @@ func ff_vvc_w_avg_8_rvv_\vlen\(), zve32x, zbb, zba
|
||||
addi t6, a6, 7
|
||||
ld t3, (sp)
|
||||
ld t4, 8(sp)
|
||||
ld t5, 16(sp)
|
||||
addi t4, t4, 1 // o0 + o1 + 1
|
||||
add t4, t4, t5
|
||||
addi t5, t6, -1 // shift - 1
|
||||
sll t4, t4, t5
|
||||
POW2_J \vlen, 2, a4
|
||||
|
||||
@@ -75,7 +75,7 @@ typedef struct VVCInterDSPContext {
|
||||
|
||||
void (*w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
|
||||
const int16_t *src0, const int16_t *src1, int width, int height,
|
||||
int denom, int w0, int w1, int o0, int o1);
|
||||
int denom, int w0, int w1, int o);
|
||||
|
||||
void (*put_ciip)(uint8_t *dst, ptrdiff_t dst_stride, int width, int height,
|
||||
const uint8_t *inter, ptrdiff_t inter_stride, int inter_weight);
|
||||
|
||||
@@ -232,22 +232,22 @@ static void apply_averaging(uint8_t *dst, const ptrdiff_t dst_stride,
|
||||
return;
|
||||
}
|
||||
|
||||
int denom, w0, w1, o1, o2;
|
||||
int denom, w0, w1, o;
|
||||
if (bcw_idx) {
|
||||
denom = 2;
|
||||
w1 = bcw_w_lut[bcw_idx];
|
||||
w0 = 8 - w1;
|
||||
o1 = o2 = 0;
|
||||
o = 0;
|
||||
} else {
|
||||
const PredWeightTable *w = pps->r->pps_wp_info_in_ph_flag ? &fc->ps.ph.pwt : &sh->pwt;
|
||||
|
||||
denom = w->log2_denom[c_idx > 0];
|
||||
w0 = w->weight[L0][c_idx][mvf->ref_idx[L0]];
|
||||
w1 = w->weight[L1][c_idx][mvf->ref_idx[L1]];
|
||||
o1 = w->offset[L0][c_idx][mvf->ref_idx[L0]];
|
||||
o2 = w->offset[L1][c_idx][mvf->ref_idx[L1]];
|
||||
o = w->offset[L0][c_idx][mvf->ref_idx[L0]]
|
||||
+ w->offset[L1][c_idx][mvf->ref_idx[L1]];
|
||||
}
|
||||
fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o1, o2);
|
||||
fc->vvcdsp.inter.w_avg(dst, dst_stride, src0, src1, width, height, denom, w0, w1, o);
|
||||
}
|
||||
|
||||
#define INTER_FILTER(t, frac) (is_chroma ? ff_vvc_inter_chroma_filters[t][frac] : ff_vvc_inter_luma_filters[t][frac])
|
||||
|
||||
@@ -201,12 +201,12 @@ static void FUNC(avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
|
||||
|
||||
static void FUNC(w_avg)(uint8_t *_dst, const ptrdiff_t _dst_stride,
|
||||
const int16_t *src0, const int16_t *src1, const int width, const int height,
|
||||
const int denom, const int w0, const int w1, const int o0, const int o1)
|
||||
const int denom, const int w0, const int w1, const int o)
|
||||
{
|
||||
pixel *dst = (pixel*)_dst;
|
||||
const ptrdiff_t dst_stride = _dst_stride / sizeof(pixel);
|
||||
const int shift = denom + FFMAX(3, 15 - BIT_DEPTH);
|
||||
const int offset = ((o0 + o1) * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
|
||||
const int offset = (o * (1 << (BIT_DEPTH - 8)) + 1) * (1 << (shift - 1));
|
||||
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++)
|
||||
|
||||
@@ -231,7 +231,7 @@ void bf(ff_vvc_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||
const int16_t *src0, const int16_t *src1, int width, int height);\
|
||||
void bf(ff_vvc_w_avg, bd, opt)(uint8_t *dst, ptrdiff_t dst_stride, \
|
||||
const int16_t *src0, const int16_t *src1, int width, int height, \
|
||||
int denom, int w0, int w1, int o0, int o1); \
|
||||
int denom, int w0, int w1, int o); \
|
||||
c->inter.avg = bf(ff_vvc_avg, bd, opt); \
|
||||
c->inter.w_avg = bf(ff_vvc_w_avg, bd, opt); \
|
||||
} while (0)
|
||||
|
||||
@@ -244,7 +244,7 @@ cglobal vvc_avg_%2, 4, 7, 5, dst, stride, src0, src1, w, h
|
||||
|
||||
;void ff_vvc_w_avg_%2_avx(uint8_t *dst, ptrdiff_t dst_stride,
|
||||
; const int16_t *src0, const int16_t *src1, int width, int height,
|
||||
; int denom, intptr_t w0, int w1, int o0, int o1);
|
||||
; int denom, intptr_t w0, int w1, int o);
|
||||
%macro VVC_W_AVG_AVX2 3
|
||||
cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w, h
|
||||
%if UNIX64
|
||||
@@ -256,8 +256,7 @@ cglobal vvc_w_avg_%2, 4, 7+2*UNIX64, 6+2*(%1 != 8), dst, stride, src0, src1, w,
|
||||
%endif
|
||||
|
||||
mov t1d, r6m ; denom
|
||||
mov t0d, r9m ; o0
|
||||
add t0d, r10m ; o1
|
||||
mov t0d, r9m ; o0 + o1
|
||||
movifnidn t2d, r8m ; w1
|
||||
add t1d, 15-%2
|
||||
%if %2 != 8
|
||||
|
||||
@@ -306,7 +306,7 @@ static void check_avg(void)
|
||||
{
|
||||
declare_func(void, uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *src0, const int16_t *src1, int width, int height,
|
||||
int denom, int w0, int w1, int o0, int o1);
|
||||
int denom, int w0, int w1, int o);
|
||||
{
|
||||
const int denom = rnd() % 8;
|
||||
const int w0 = rnd() % 256 - 128;
|
||||
@@ -317,12 +317,12 @@ static void check_avg(void)
|
||||
memset(dst0, 0, AVG_DST_BUF_SIZE);
|
||||
memset(dst1, 0, AVG_DST_BUF_SIZE);
|
||||
|
||||
call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
|
||||
call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0, o1);
|
||||
call_ref(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
|
||||
call_new(dst1, MAX_CTU_SIZE * SIZEOF_PIXEL, src10, src11, w, h, denom, w0, w1, o0 + o1);
|
||||
if (memcmp(dst0, dst1, DST_BUF_SIZE))
|
||||
fail();
|
||||
if (w == h)
|
||||
bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0, o1);
|
||||
bench_new(dst0, MAX_CTU_SIZE * SIZEOF_PIXEL, src00, src01, w, h, denom, w0, w1, o0 + o1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user