mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-30 22:00:51 +08:00
Replace uxtl with umull in dmvr_hv_8
Before and after on A78: dmvr_hv_8_12x20_neon: 205.3 ( 5.21x) dmvr_hv_8_20x12_neon: 171.8 ( 3.15x) dmvr_hv_8_20x20_neon: 282.7 ( 3.11x) dmvr_hv_8_12x20_neon: 172.7 ( 5.58x) dmvr_hv_8_20x12_neon: 133.3 ( 3.36x) dmvr_hv_8_20x20_neon: 214.6 ( 3.40x)
This commit is contained in:
committed by
Martin Storsjö
parent
85b5d4adf2
commit
fb4407797e
@@ -393,13 +393,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1
|
||||
|
||||
movrel x9, X(ff_vvc_inter_luma_dmvr_filters)
|
||||
add x12, x9, mx, lsl #1
|
||||
ldrb w10, [x12]
|
||||
ldrb w11, [x12, #1]
|
||||
mov tmp0, sp
|
||||
add tmp1, tmp0, #(VVC_MAX_PB_SIZE * 2)
|
||||
// We know the value are positive
|
||||
dup v0.8h, w10 // filter_x[0]
|
||||
dup v1.8h, w11 // filter_x[1]
|
||||
ld2r {v0.16b, v1.16b}, [x12]
|
||||
|
||||
add x12, x9, my, lsl #1
|
||||
ldrb w10, [x12]
|
||||
@@ -424,14 +421,10 @@ function ff_vvc_dmvr_hv_8_neon, export=1
|
||||
// width > 16
|
||||
ldur q5, [src, #1]
|
||||
ldr q4, [src], #16
|
||||
uxtl v7.8h, v5.8b
|
||||
uxtl2 v17.8h, v5.16b
|
||||
uxtl v6.8h, v4.8b
|
||||
uxtl2 v16.8h, v4.16b
|
||||
mul v6.8h, v6.8h, v0.8h
|
||||
mul v16.8h, v16.8h, v0.8h
|
||||
mla v6.8h, v7.8h, v1.8h
|
||||
mla v16.8h, v17.8h, v1.8h
|
||||
umull v6.8h, v4.8b, v0.8b
|
||||
umull2 v16.8h, v4.16b, v0.16b
|
||||
umlal v6.8h, v5.8b, v1.8b
|
||||
umlal2 v16.8h, v5.16b, v1.16b
|
||||
urshr v6.8h, v6.8h, #(8 - 6)
|
||||
urshr v7.8h, v16.8h, #(8 - 6)
|
||||
stp q6, q7, [x13], #32
|
||||
@@ -451,10 +444,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1
|
||||
// width > 8
|
||||
ldur d5, [src, #1]
|
||||
ldr d4, [src], #8
|
||||
uxtl v7.8h, v5.8b
|
||||
uxtl v6.8h, v4.8b
|
||||
mul v6.8h, v6.8h, v0.8h
|
||||
mla v6.8h, v7.8h, v1.8h
|
||||
umull v6.8h, v4.8b, v0.8b
|
||||
umlal v6.8h, v5.8b, v1.8b
|
||||
urshr v6.8h, v6.8h, #(8 - 6)
|
||||
str q6, [x13], #16
|
||||
|
||||
@@ -468,10 +459,8 @@ function ff_vvc_dmvr_hv_8_neon, export=1
|
||||
3:
|
||||
ldur s5, [src, #1]
|
||||
ldr s4, [src], #4
|
||||
uxtl v7.8h, v5.8b
|
||||
uxtl v6.8h, v4.8b
|
||||
mul v6.4h, v6.4h, v0.4h
|
||||
mla v6.4h, v7.4h, v1.4h
|
||||
umull v6.8h, v4.8b, v0.8b
|
||||
umlal v6.8h, v5.8b, v1.8b
|
||||
urshr v6.4h, v6.4h, #(8 - 6)
|
||||
str d6, [x13], #8
|
||||
|
||||
|
||||
Reference in New Issue
Block a user