mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-05-01 06:13:08 +08:00
avcodec/aarch64/vvc: Implement dmvr_h_8
A78: dmvr_h_8_12x20_neon: 76.6 ( 4.31x) dmvr_h_8_20x12_neon: 65.8 ( 3.49x) dmvr_h_8_20x20_neon: 106.6 ( 3.62x) A72: dmvr_h_8_12x20_neon: 190.6 ( 4.40x) dmvr_h_8_20x12_neon: 171.1 ( 4.31x) dmvr_h_8_20x20_neon: 275.1 ( 4.50x)
This commit is contained in:
committed by
jianhuaw
parent
1b97966199
commit
189e841cfd
@@ -95,6 +95,7 @@ W_AVG_FUN(12)
|
||||
|
||||
DMVR_FUN(, 8)
|
||||
DMVR_FUN(, 12)
|
||||
DMVR_FUN(h_, 8)
|
||||
DMVR_FUN(hv_, 8)
|
||||
DMVR_FUN(hv_, 10)
|
||||
DMVR_FUN(hv_, 12)
|
||||
@@ -188,6 +189,7 @@ void ff_vvc_dsp_init_aarch64(VVCDSPContext *const c, const int bd)
|
||||
c->inter.avg = ff_vvc_avg_8_neon;
|
||||
c->inter.w_avg = vvc_w_avg_8;
|
||||
c->inter.dmvr[0][0] = ff_vvc_dmvr_8_neon;
|
||||
c->inter.dmvr[0][1] = ff_vvc_dmvr_h_8_neon;
|
||||
c->inter.dmvr[1][1] = ff_vvc_dmvr_hv_8_neon;
|
||||
c->inter.apply_bdof = ff_vvc_apply_bdof_8_neon;
|
||||
|
||||
|
||||
@@ -385,6 +385,58 @@ function ff_vvc_dmvr_12_neon, export=1
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vvc_dmvr_h_8_neon, export=1
|
||||
movrel x7, X(ff_vvc_inter_luma_dmvr_filters)
|
||||
add x7, x7, x4, lsl #1
|
||||
ld2r {v0.16b, v1.16b}, [x7]
|
||||
tbz w6, #4, 12f
|
||||
20:
|
||||
ldur q3, [x1, #1]
|
||||
ldr q2, [x1]
|
||||
umull v4.8h, v0.8b, v2.8b
|
||||
umull2 v5.8h, v0.16b, v2.16b
|
||||
ldur s17, [x1, #17]
|
||||
umull v6.8h, v1.8b, v3.8b
|
||||
ldr s16, [x1, #16]
|
||||
umull2 v7.8h, v1.16b, v3.16b
|
||||
add v4.8h, v4.8h, v6.8h
|
||||
umull v17.8h, v1.8b, v17.8b
|
||||
add v5.8h, v5.8h, v7.8h
|
||||
umull v16.8h, v0.8b, v16.8b
|
||||
srshr v4.8h, v4.8h, #2
|
||||
add v16.4h, v16.4h, v17.4h
|
||||
srshr v5.8h, v5.8h, #2
|
||||
srshr v16.4h, v16.4h, #2
|
||||
st1 {v4.8h, v5.8h}, [x0], #32
|
||||
subs w3, w3, #1
|
||||
st1 {v16.4h}, [x0], #8
|
||||
add x1, x1, x2
|
||||
add x0, x0, #(VVC_MAX_PB_SIZE * 2 - 32 - 8)
|
||||
b.ne 20b
|
||||
ret
|
||||
|
||||
12:
|
||||
ldur d3, [x1, #1]
|
||||
ldr d2, [x1]
|
||||
umull v4.8h, v0.8b, v2.8b
|
||||
ldur s17, [x1, #9]
|
||||
umull v6.8h, v1.8b, v3.8b
|
||||
ldr s16, [x1, #8]
|
||||
add v4.8h, v4.8h, v6.8h
|
||||
umull v17.8h, v1.8b, v17.8b
|
||||
umull v16.8h, v0.8b, v16.8b
|
||||
srshr v4.8h, v4.8h, #2
|
||||
add v16.4h, v16.4h, v17.4h
|
||||
srshr v16.4h, v16.4h, #2
|
||||
st1 {v4.8h}, [x0], #16
|
||||
subs w3, w3, #1
|
||||
st1 {v16.4h}, [x0], #8
|
||||
add x1, x1, x2
|
||||
add x0, x0, #(VVC_MAX_PB_SIZE * 2 - 16 - 8)
|
||||
b.ne 12b
|
||||
ret
|
||||
endfunc
|
||||
|
||||
function ff_vvc_dmvr_hv_8_neon, export=1
|
||||
tmp0 .req x7
|
||||
tmp1 .req x8
|
||||
|
||||
Reference in New Issue
Block a user