aarch64: Add PAC sign/validation of the link register

Whenever the link register is stored on the stack, sign it
before storing it and validate at a symmetrical point (with the
stack at the same level as when it was signed).

These macros only have an effect if built with PAC enabled (e.g.
through -mbranch-protection=standard), otherwise they don't
generate any extra instructions.

None of these cases were present when PAC support was added
in 248986a0db in 2022.

Without these changes, PAC still had an effect in the compiler
generated code and in the existing cases where we these macros were
used - but make it apply to the remaining cases of link register
on the stack.
This commit is contained in:
Martin Storsjö
2026-03-16 12:58:10 +02:00
parent dbf7354d98
commit f72f692afa
6 changed files with 203 additions and 20 deletions

View File

@@ -1276,6 +1276,7 @@ function ff_hevc_put_hevc_epel_bi_v32_8_neon, export=1
endfunc
function ff_hevc_put_hevc_epel_bi_v48_8_neon, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -1292,10 +1293,12 @@ function ff_hevc_put_hevc_epel_bi_v48_8_neon, export=1
bl X(ff_hevc_put_hevc_epel_bi_v24_8_neon)
ldr x30, [sp, #8]
add sp, sp, #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_bi_v64_8_neon, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -1312,6 +1315,7 @@ function ff_hevc_put_hevc_epel_bi_v64_8_neon, export=1
bl X(ff_hevc_put_hevc_epel_bi_v32_8_neon)
ldr x30, [sp, #8]
add sp, sp, #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -3216,6 +3220,7 @@ function ff_hevc_put_hevc_epel_hv4_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3224,6 +3229,7 @@ function ff_hevc_put_hevc_epel_hv4_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_h4_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_hv4_8_end_neon
endfunc
@@ -3231,6 +3237,7 @@ function ff_vvc_put_epel_hv4_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #8
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3239,6 +3246,7 @@ function ff_vvc_put_epel_hv4_8_\suffix, export=1
bl X(ff_vvc_put_epel_h4_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b vvc_put_epel_hv4_8_end_neon
endfunc
@@ -3246,6 +3254,7 @@ function ff_hevc_put_hevc_epel_hv6_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3254,6 +3263,7 @@ function ff_hevc_put_hevc_epel_hv6_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_h6_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_hv6_8_end_neon
endfunc
@@ -3261,6 +3271,7 @@ function ff_hevc_put_hevc_epel_hv8_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3269,6 +3280,7 @@ function ff_hevc_put_hevc_epel_hv8_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_h8_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_hv8_8_end_neon
endfunc
@@ -3276,6 +3288,7 @@ function ff_vvc_put_epel_hv8_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #8
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3284,6 +3297,7 @@ function ff_vvc_put_epel_hv8_8_\suffix, export=1
bl X(ff_vvc_put_epel_h8_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b vvc_put_epel_hv8_8_end_neon
endfunc
@@ -3291,6 +3305,7 @@ function ff_hevc_put_hevc_epel_hv12_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3299,6 +3314,7 @@ function ff_hevc_put_hevc_epel_hv12_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_h12_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_hv12_8_end_neon
endfunc
@@ -3306,6 +3322,7 @@ function ff_hevc_put_hevc_epel_hv16_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3314,6 +3331,7 @@ function ff_hevc_put_hevc_epel_hv16_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_h16_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_hv16_8_end_neon
endfunc
@@ -3321,6 +3339,7 @@ function ff_vvc_put_epel_hv16_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #8
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3329,6 +3348,7 @@ function ff_vvc_put_epel_hv16_8_\suffix, export=1
bl X(ff_vvc_put_epel_h16_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b vvc_put_epel_hv16_8_end_neon
endfunc
@@ -3336,6 +3356,7 @@ function ff_hevc_put_hevc_epel_hv24_8_\suffix, export=1
add w10, w3, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-32]!
stp x0, x3, [sp, #16]
add x0, sp, #32
@@ -3344,10 +3365,12 @@ function ff_hevc_put_hevc_epel_hv24_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_h24_8_\suffix)
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_hv24_8_end_neon
endfunc
function ff_hevc_put_hevc_epel_hv32_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3362,10 +3385,12 @@ function ff_hevc_put_hevc_epel_hv32_8_\suffix, export=1
mov x6, #16
bl X(ff_hevc_put_hevc_epel_hv16_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_vvc_put_epel_hv32_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3380,10 +3405,12 @@ function ff_vvc_put_epel_hv32_8_\suffix, export=1
mov x6, #16
bl X(ff_vvc_put_epel_hv16_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_hv48_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3398,10 +3425,12 @@ function ff_hevc_put_hevc_epel_hv48_8_\suffix, export=1
mov x6, #24
bl X(ff_hevc_put_hevc_epel_hv24_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_hv64_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3430,10 +3459,12 @@ function ff_hevc_put_hevc_epel_hv64_8_\suffix, export=1
mov x6, #16
bl X(ff_hevc_put_hevc_epel_hv16_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_vvc_put_epel_hv64_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3448,10 +3479,12 @@ function ff_vvc_put_epel_hv64_8_\suffix, export=1
mov x6, #32
bl X(ff_vvc_put_epel_hv32_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_vvc_put_epel_hv128_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3466,6 +3499,7 @@ function ff_vvc_put_epel_hv128_8_\suffix, export=1
mov x6, #64
bl X(ff_vvc_put_epel_hv64_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -3606,6 +3640,7 @@ function ff_hevc_put_hevc_epel_uni_hv4_8_\suffix, export=1
add w10, w4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3618,6 +3653,7 @@ function ff_hevc_put_hevc_epel_uni_hv4_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_hv4_8_end_neon
endfunc
@@ -3625,6 +3661,7 @@ function ff_hevc_put_hevc_epel_uni_hv6_8_\suffix, export=1
add w10, w4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3637,6 +3674,7 @@ function ff_hevc_put_hevc_epel_uni_hv6_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_hv6_8_end_neon
endfunc
@@ -3644,6 +3682,7 @@ function ff_hevc_put_hevc_epel_uni_hv8_8_\suffix, export=1
add w10, w4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3656,6 +3695,7 @@ function ff_hevc_put_hevc_epel_uni_hv8_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_hv8_8_end_neon
endfunc
@@ -3663,6 +3703,7 @@ function ff_hevc_put_hevc_epel_uni_hv12_8_\suffix, export=1
add w10, w4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3675,6 +3716,7 @@ function ff_hevc_put_hevc_epel_uni_hv12_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_hv12_8_end_neon
endfunc
@@ -3682,6 +3724,7 @@ function ff_hevc_put_hevc_epel_uni_hv16_8_\suffix, export=1
add w10, w4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3694,6 +3737,7 @@ function ff_hevc_put_hevc_epel_uni_hv16_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_hv16_8_end_neon
endfunc
@@ -3701,6 +3745,7 @@ function ff_hevc_put_hevc_epel_uni_hv24_8_\suffix, export=1
add w10, w4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3713,10 +3758,12 @@ function ff_hevc_put_hevc_epel_uni_hv24_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_hv24_8_end_neon
endfunc
function ff_hevc_put_hevc_epel_uni_hv32_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x5, x6, [sp, #-64]!
stp x3, x4, [sp, #16]
stp x1, x2, [sp, #32]
@@ -3733,10 +3780,12 @@ function ff_hevc_put_hevc_epel_uni_hv32_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_uni_hv16_8_\suffix)
ldr x30, [sp, #56]
add sp, sp, #64
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_uni_hv48_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x5, x6, [sp, #-64]!
stp x3, x4, [sp, #16]
stp x1, x2, [sp, #32]
@@ -3753,10 +3802,12 @@ function ff_hevc_put_hevc_epel_uni_hv48_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_uni_hv24_8_\suffix)
ldr x30, [sp, #56]
add sp, sp, #64
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_uni_hv64_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x5, x6, [sp, #-64]!
stp x3, x4, [sp, #16]
stp x1, x2, [sp, #32]
@@ -3789,6 +3840,7 @@ function ff_hevc_put_hevc_epel_uni_hv64_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_uni_hv16_8_\suffix)
ldr x30, [sp, #56]
add sp, sp, #64
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
.endm
@@ -4594,6 +4646,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv4_8_\suffix, export=1
add x10, x4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -4606,6 +4659,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv4_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_w_hv4_8_end_neon
endfunc
@@ -4616,6 +4670,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv6_8_\suffix, export=1
add x10, x4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -4628,6 +4683,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv6_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_w_hv6_8_end_neon
endfunc
@@ -4638,6 +4694,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv8_8_\suffix, export=1
add x10, x4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -4650,6 +4707,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv8_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_w_hv8_8_end_neon
endfunc
@@ -4660,6 +4718,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv12_8_\suffix, export=1
add x10, x4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -4672,6 +4731,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv12_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_w_hv12_8_end_neon
endfunc
@@ -4682,6 +4742,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv16_8_\suffix, export=1
add x10, x4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -4694,6 +4755,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv16_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_w_hv16_8_end_neon
endfunc
@@ -4704,6 +4766,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv24_8_\suffix, export=1
add x10, x4, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -4716,10 +4779,12 @@ function ff_hevc_put_hevc_epel_uni_w_hv24_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldr x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_uni_w_hv24_8_end_neon
endfunc
function ff_hevc_put_hevc_epel_uni_w_hv32_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
ldp x15, x16, [sp]
mov x17, #16
stp x15, x16, [sp, #-96]!
@@ -4744,10 +4809,12 @@ function ff_hevc_put_hevc_epel_uni_w_hv32_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_uni_w_hv16_8_\suffix)
ldp x17, x30, [sp, #16]
ldp x15, x16, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_uni_w_hv48_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
ldp x15, x16, [sp]
mov x17, #24
stp x15, x16, [sp, #-96]!
@@ -4771,10 +4838,12 @@ function ff_hevc_put_hevc_epel_uni_w_hv48_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_uni_w_hv24_8_\suffix)
ldp x17, x30, [sp, #16]
ldp x15, x16, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_uni_w_hv64_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
ldp x15, x16, [sp]
mov x17, #32
stp x15, x16, [sp, #-96]!
@@ -4799,6 +4868,7 @@ function ff_hevc_put_hevc_epel_uni_w_hv64_8_\suffix, export=1
bl X(ff_hevc_put_hevc_epel_uni_w_hv32_8_\suffix)
ldp x17, x30, [sp, #16]
ldp x15, x16, [sp], #32
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
.endm
@@ -4989,6 +5059,7 @@ function ff_hevc_put_hevc_epel_bi_hv4_8_\suffix, export=1
add w10, w5, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-48]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5002,6 +5073,7 @@ function ff_hevc_put_hevc_epel_bi_hv4_8_\suffix, export=1
ldp x4, x5, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_bi_hv4_8_end_neon
endfunc
@@ -5009,6 +5081,7 @@ function ff_hevc_put_hevc_epel_bi_hv6_8_\suffix, export=1
add w10, w5, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-48]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5022,6 +5095,7 @@ function ff_hevc_put_hevc_epel_bi_hv6_8_\suffix, export=1
ldp x4, x5, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_bi_hv6_8_end_neon
endfunc
@@ -5029,6 +5103,7 @@ function ff_hevc_put_hevc_epel_bi_hv8_8_\suffix, export=1
add w10, w5, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-48]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5042,6 +5117,7 @@ function ff_hevc_put_hevc_epel_bi_hv8_8_\suffix, export=1
ldp x4, x5, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_bi_hv8_8_end_neon
endfunc
@@ -5049,6 +5125,7 @@ function ff_hevc_put_hevc_epel_bi_hv12_8_\suffix, export=1
add w10, w5, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-48]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5062,6 +5139,7 @@ function ff_hevc_put_hevc_epel_bi_hv12_8_\suffix, export=1
ldp x4, x5, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_bi_hv12_8_end_neon
endfunc
@@ -5069,6 +5147,7 @@ function ff_hevc_put_hevc_epel_bi_hv16_8_\suffix, export=1
add w10, w5, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-48]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5082,6 +5161,7 @@ function ff_hevc_put_hevc_epel_bi_hv16_8_\suffix, export=1
ldp x4, x5, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_bi_hv16_8_end_neon
endfunc
@@ -5089,6 +5169,7 @@ function ff_hevc_put_hevc_epel_bi_hv24_8_\suffix, export=1
add w10, w5, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-48]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5102,6 +5183,7 @@ function ff_hevc_put_hevc_epel_bi_hv24_8_\suffix, export=1
ldp x4, x5, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_bi_hv24_8_end_neon
endfunc
@@ -5110,6 +5192,7 @@ function ff_hevc_put_hevc_epel_bi_hv32_8_\suffix, export=1
add w10, w5, #3
lsl x10, x10, #7
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-48]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5124,10 +5207,12 @@ function ff_hevc_put_hevc_epel_bi_hv32_8_\suffix, export=1
ldp x4, x5, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_epel_bi_hv32_8_end_neon
endfunc
function ff_hevc_put_hevc_epel_bi_hv48_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x6, x7, [sp, #-80]!
stp x4, x5, [sp, #16]
stp x2, x3, [sp, #32]
@@ -5143,10 +5228,12 @@ function ff_hevc_put_hevc_epel_bi_hv48_8_\suffix, export=1
add x4, x4, #48
bl X(ff_hevc_put_hevc_epel_bi_hv24_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_epel_bi_hv64_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x6, x7, [sp, #-80]!
stp x4, x5, [sp, #16]
stp x2, x3, [sp, #32]
@@ -5162,6 +5249,7 @@ function ff_hevc_put_hevc_epel_bi_hv64_8_\suffix, export=1
add x4, x4, #64
bl X(ff_hevc_put_hevc_epel_bi_hv32_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
.endm

View File

@@ -630,6 +630,7 @@ function ff_hevc_put_hevc_\type\()_h24_8_neon, export=1
0:
.ifc \type, qpel
// Preserve filter index (mx) and lr so h16 gets correct mx; ret uses lr
AARCH64_SIGN_LINK_REGISTER
stp x4, x30, [sp, #-16]!
ldr x4, [sp]
.endif
@@ -700,6 +701,7 @@ function ff_hevc_put_hevc_\type\()_h24_8_neon, export=1
sub x12, x12, #16
.ifc \type, qpel
ldp x4, x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
.endif
b.gt 0b
.ifc \type, qpel
@@ -1156,6 +1158,7 @@ function ff_hevc_put_hevc_qpel_v32_8_neon, export=1
endfunc
function ff_hevc_put_hevc_qpel_v48_8_neon, export=1
AARCH64_SIGN_LINK_REGISTER
stp x2, x3, [sp, #-48]!
stp x0, x1, [sp, #16]
stp x5, x30, [sp, #32]
@@ -1168,6 +1171,7 @@ function ff_hevc_put_hevc_qpel_v48_8_neon, export=1
bl X(ff_hevc_put_hevc_qpel_v24_8_neon)
ldr x30, [sp, #8]
add sp, sp, #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -1446,6 +1450,7 @@ function ff_hevc_put_hevc_qpel_bi_v16_8_neon, export=1
endfunc
function ff_hevc_put_hevc_qpel_bi_v24_8_neon, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -1461,6 +1466,7 @@ function ff_hevc_put_hevc_qpel_bi_v24_8_neon, export=1
bl X(ff_hevc_put_hevc_qpel_bi_v8_8_neon)
ldr x30, [sp, #8]
add sp, sp, #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -1522,6 +1528,7 @@ function ff_hevc_put_hevc_qpel_bi_v32_8_neon, export=1
endfunc
function ff_hevc_put_hevc_qpel_bi_v48_8_neon, export=1
AARCH64_SIGN_LINK_REGISTER
mov x8, #32
str x8, [sp, #-80]!
stp x4, x5, [sp, #16]
@@ -1540,6 +1547,7 @@ function ff_hevc_put_hevc_qpel_bi_v48_8_neon, export=1
bl X(ff_hevc_put_hevc_qpel_bi_v16_8_neon)
ldr x30, [sp, #8]
add sp, sp, #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -3280,6 +3288,7 @@ function ff_hevc_put_hevc_qpel_uni_hv4_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x30, x14,[sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3293,6 +3302,7 @@ function ff_hevc_put_hevc_qpel_uni_hv4_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x30, x14, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv4_8_end_neon
endfunc
@@ -3301,6 +3311,7 @@ function ff_hevc_put_hevc_qpel_uni_hv6_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x30, x14,[sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3314,6 +3325,7 @@ function ff_hevc_put_hevc_qpel_uni_hv6_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x30, x14, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv6_8_end_neon
endfunc
@@ -3322,6 +3334,7 @@ function ff_hevc_put_hevc_qpel_uni_hv8_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x30, x14,[sp, #-48]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3335,6 +3348,7 @@ function ff_hevc_put_hevc_qpel_uni_hv8_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x30, x14, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv8_8_end_neon
endfunc
@@ -3343,6 +3357,7 @@ function ff_hevc_put_hevc_qpel_uni_hv12_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3359,6 +3374,7 @@ function ff_hevc_put_hevc_qpel_uni_hv12_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv12_8_end_neon
endfunc
@@ -3367,6 +3383,7 @@ function ff_hevc_put_hevc_qpel_uni_hv16_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3382,10 +3399,12 @@ function ff_hevc_put_hevc_qpel_uni_hv16_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv16_8_end_neon
endfunc
function ff_hevc_put_hevc_qpel_uni_hv24_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3402,6 +3421,7 @@ function ff_hevc_put_hevc_qpel_uni_hv24_8_\suffix, export=1
bl X(ff_hevc_put_hevc_qpel_uni_hv8_8_\suffix)
ldr x30, [sp, #8]
add sp, sp, #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -3410,6 +3430,7 @@ function ff_hevc_put_hevc_qpel_uni_hv32_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3426,6 +3447,7 @@ function ff_hevc_put_hevc_qpel_uni_hv32_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv16_8_end_neon
endfunc
@@ -3434,6 +3456,7 @@ function ff_hevc_put_hevc_qpel_uni_hv48_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3454,6 +3477,7 @@ function ff_hevc_put_hevc_qpel_uni_hv48_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv16_8_end_neon
endfunc
@@ -3462,6 +3486,7 @@ function ff_hevc_put_hevc_qpel_uni_hv64_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x6, [sp, #16]
stp x0, x1, [sp, #32]
@@ -3482,6 +3507,7 @@ function ff_hevc_put_hevc_qpel_uni_hv64_8_\suffix, export=1
ldp x4, x6, [sp, #16]
ldp x0, x1, [sp, #32]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_uni_hv16_8_end_neon
endfunc
.endm
@@ -5055,6 +5081,7 @@ function ff_hevc_put_hevc_qpel_hv4_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5066,6 +5093,7 @@ function ff_hevc_put_hevc_qpel_hv4_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_hv4_8_end_neon
endfunc
@@ -5074,6 +5102,7 @@ function ff_vvc_put_qpel_hv4_8_\suffix, export=1
lsl x10, x10, #8
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5085,6 +5114,7 @@ function ff_vvc_put_qpel_hv4_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b vvc_put_qpel_hv4_8_end_neon
endfunc
@@ -5094,6 +5124,7 @@ function ff_hevc_put_hevc_qpel_hv6_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5105,6 +5136,7 @@ function ff_hevc_put_hevc_qpel_hv6_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_hv6_8_end_neon
endfunc
@@ -5114,6 +5146,7 @@ function ff_hevc_put_hevc_qpel_hv8_8_\suffix, export=1
sub x1, x1, x2, lsl #1
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5124,6 +5157,7 @@ function ff_hevc_put_hevc_qpel_hv8_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_hv8_8_end_neon
endfunc
@@ -5133,6 +5167,7 @@ function ff_vvc_put_qpel_hv8_8_\suffix, export=1
sub x1, x1, x2, lsl #1
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5143,6 +5178,7 @@ function ff_vvc_put_qpel_hv8_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b vvc_put_qpel_hv8_8_end_neon
endfunc
@@ -5152,6 +5188,7 @@ function ff_hevc_put_hevc_qpel_hv12_8_\suffix, export=1
sub x1, x1, x2, lsl #1
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5163,6 +5200,7 @@ function ff_hevc_put_hevc_qpel_hv12_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_hv12_8_end_neon
endfunc
@@ -5172,6 +5210,7 @@ function ff_hevc_put_hevc_qpel_hv16_8_\suffix, export=1
sub x1, x1, x2, lsl #1
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5182,6 +5221,7 @@ function ff_hevc_put_hevc_qpel_hv16_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_hv16_8_end_neon
endfunc
@@ -5191,6 +5231,7 @@ function ff_vvc_put_qpel_hv16_8_\suffix, export=1
sub x1, x1, x2, lsl #1
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5201,10 +5242,12 @@ function ff_vvc_put_qpel_hv16_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b vvc_put_qpel_hv16_8_end_neon
endfunc
function ff_hevc_put_hevc_qpel_hv24_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5217,6 +5260,7 @@ function ff_hevc_put_hevc_qpel_hv24_8_\suffix, export=1
add x0, x0, #24
bl X(ff_hevc_put_hevc_qpel_hv12_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -5227,6 +5271,7 @@ function ff_hevc_put_hevc_qpel_hv32_8_\suffix, export=1
sub x1, x1, x2
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5237,6 +5282,7 @@ function ff_hevc_put_hevc_qpel_hv32_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_hv32_8_end_neon
endfunc
@@ -5247,6 +5293,7 @@ function ff_vvc_put_qpel_hv32_8_\suffix, export=1
sub x1, x1, x2
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x5, x30, [sp, #-48]!
stp x0, x3, [sp, #16]
str x14, [sp, #32]
@@ -5257,10 +5304,12 @@ function ff_vvc_put_qpel_hv32_8_\suffix, export=1
ldr x14, [sp, #32]
ldp x0, x3, [sp, #16]
ldp x5, x30, [sp], #48
AARCH64_VALIDATE_LINK_REGISTER
b vvc_put_qpel_hv32_8_end_neon
endfunc
function ff_hevc_put_hevc_qpel_hv48_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5273,10 +5322,12 @@ function ff_hevc_put_hevc_qpel_hv48_8_\suffix, export=1
add x0, x0, #48
bl X(ff_hevc_put_hevc_qpel_hv24_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_hevc_put_hevc_qpel_hv64_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5291,10 +5342,12 @@ function ff_hevc_put_hevc_qpel_hv64_8_\suffix, export=1
mov x6, #32
bl X(ff_hevc_put_hevc_qpel_hv32_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_vvc_put_qpel_hv64_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5309,10 +5362,12 @@ function ff_vvc_put_qpel_hv64_8_\suffix, export=1
mov x6, #32
bl X(ff_vvc_put_qpel_hv32_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
function ff_vvc_put_qpel_hv128_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x4, x5, [sp, #-64]!
stp x2, x3, [sp, #16]
stp x0, x1, [sp, #32]
@@ -5327,6 +5382,7 @@ function ff_vvc_put_qpel_hv128_8_\suffix, export=1
mov x6, #64
bl X(ff_vvc_put_qpel_hv64_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -5345,6 +5401,7 @@ DISABLE_I8MM
.macro QPEL_UNI_W_HV_HEADER width, suffix
ldp x14, x15, [sp] // mx, my
ldr w13, [sp, #16] // width
AARCH64_SIGN_LINK_REGISTER
stp x19, x30, [sp, #-80]!
stp x20, x21, [sp, #16]
stp x22, x23, [sp, #32]
@@ -5398,6 +5455,7 @@ DISABLE_I8MM
ldp x24, x25, [sp, #48]
ldp x26, x27, [sp, #64]
ldp x19, x30, [sp], #80
AARCH64_VALIDATE_LINK_REGISTER
.endm
.macro QPEL_UNI_W_HV_4
@@ -6176,6 +6234,7 @@ function ff_hevc_put_hevc_qpel_bi_hv4_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -6191,6 +6250,7 @@ function ff_hevc_put_hevc_qpel_bi_hv4_8_\suffix, export=1
ldp x0, x1, [sp, #32]
ldr x14, [sp, #48]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_bi_hv4_8_end_neon
endfunc
@@ -6199,6 +6259,7 @@ function ff_hevc_put_hevc_qpel_bi_hv6_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -6214,6 +6275,7 @@ function ff_hevc_put_hevc_qpel_bi_hv6_8_\suffix, export=1
ldp x0, x1, [sp, #32]
ldr x14, [sp, #48]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_bi_hv6_8_end_neon
endfunc
@@ -6222,6 +6284,7 @@ function ff_hevc_put_hevc_qpel_bi_hv8_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -6237,10 +6300,12 @@ function ff_hevc_put_hevc_qpel_bi_hv8_8_\suffix, export=1
ldp x0, x1, [sp, #32]
ldr x14, [sp, #48]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
b hevc_put_hevc_qpel_bi_hv8_8_end_neon
endfunc
function ff_hevc_put_hevc_qpel_bi_hv12_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x6, x7, [sp, #-80]!
stp x4, x5, [sp, #16]
stp x2, x3, [sp, #32]
@@ -6256,6 +6321,7 @@ function ff_hevc_put_hevc_qpel_bi_hv12_8_\suffix, export=1
add x0, x0, #8
bl X(ff_hevc_put_hevc_qpel_bi_hv4_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -6264,6 +6330,7 @@ function ff_hevc_put_hevc_qpel_bi_hv16_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -6279,11 +6346,13 @@ function ff_hevc_put_hevc_qpel_bi_hv16_8_\suffix, export=1
ldp x0, x1, [sp, #32]
ldr x14, [sp, #48]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
mov x6, #16 // width
b hevc_put_hevc_qpel_bi_hv16_8_end_neon
endfunc
function ff_hevc_put_hevc_qpel_bi_hv24_8_\suffix, export=1
AARCH64_SIGN_LINK_REGISTER
stp x6, x7, [sp, #-80]!
stp x4, x5, [sp, #16]
stp x2, x3, [sp, #32]
@@ -6299,6 +6368,7 @@ function ff_hevc_put_hevc_qpel_bi_hv24_8_\suffix, export=1
add x0, x0, #16
bl X(ff_hevc_put_hevc_qpel_bi_hv8_8_\suffix)
ldr x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc
@@ -6307,6 +6377,7 @@ function ff_hevc_put_hevc_qpel_bi_hv32_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -6323,6 +6394,7 @@ function ff_hevc_put_hevc_qpel_bi_hv32_8_\suffix, export=1
ldp x0, x1, [sp, #32]
ldr x14, [sp, #48]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
mov x6, #32 // width
b hevc_put_hevc_qpel_bi_hv16_8_end_neon
endfunc
@@ -6332,6 +6404,7 @@ function ff_hevc_put_hevc_qpel_bi_hv48_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -6352,6 +6425,7 @@ function ff_hevc_put_hevc_qpel_bi_hv48_8_\suffix, export=1
ldp x0, x1, [sp, #32]
ldr x14, [sp, #48]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
mov x6, #48 // width
b hevc_put_hevc_qpel_bi_hv16_8_end_neon
endfunc
@@ -6361,6 +6435,7 @@ function ff_hevc_put_hevc_qpel_bi_hv64_8_\suffix, export=1
lsl x10, x10, #7
mov x14, sp
sub sp, sp, x10 // tmp_array
AARCH64_SIGN_LINK_REGISTER
stp x7, x30, [sp, #-64]!
stp x4, x5, [sp, #16]
stp x0, x1, [sp, #32]
@@ -6381,6 +6456,7 @@ function ff_hevc_put_hevc_qpel_bi_hv64_8_\suffix, export=1
ldp x0, x1, [sp, #32]
ldr x14, [sp, #48]
ldp x7, x30, [sp], #64
AARCH64_VALIDATE_LINK_REGISTER
mov x6, #64 // width
b hevc_put_hevc_qpel_bi_hv16_8_end_neon
endfunc

View File

@@ -1169,9 +1169,11 @@ function nsse16_neon, export=1
str x0, [sp, #-0x40]!
stp x1, x2, [sp, #0x10]
stp x3, x4, [sp, #0x20]
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #0x30]
bl X(sse16_neon)
ldr x30, [sp, #0x30]
AARCH64_VALIDATE_LINK_REGISTER
mov w9, w0 // here we store score1
ldp x1, x2, [sp, #0x10]
ldp x3, x4, [sp, #0x20]
@@ -1290,9 +1292,11 @@ function nsse8_neon, export=1
str x0, [sp, #-0x40]!
stp x1, x2, [sp, #0x10]
stp x3, x4, [sp, #0x20]
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #0x30]
bl X(sse8_neon)
ldr x30, [sp, #0x30]
AARCH64_VALIDATE_LINK_REGISTER
mov w9, w0 // here we store score1
ldp x1, x2, [sp, #0x10]
ldp x3, x4, [sp, #0x20]

View File

@@ -1611,6 +1611,7 @@ endfunc
function ff_vvc_apply_bdof_8_neon, export=1
mov w6, #8
0:
AARCH64_SIGN_LINK_REGISTER
stp x19, x20, [sp, #-0x40]!
stp x21, x22, [sp, #0x10]
stp x23, x24, [sp, #0x20]
@@ -1703,6 +1704,7 @@ function ff_vvc_apply_bdof_8_neon, export=1
ldp x23, x24, [sp, #0x20]
ldp x21, x22, [sp, #0x10]
ldp x19, x20, [sp], #0x40
AARCH64_VALIDATE_LINK_REGISTER
ret
endfunc

View File

@@ -131,26 +131,6 @@ DISABLE_SME
DISABLE_SME_I16I64
DISABLE_SME2
#if HAVE_SME
.macro sme_entry
stp x29, x30, [sp, #-80]!
mov x29, sp
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
smstart
.endm
.macro sme_exit
smstop
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x29, x30, [sp], #80
.endm
#endif
/* Support macros for
* - Armv8.3-A Pointer Authentication and
@@ -356,3 +336,26 @@ ELF .size \name, . - \name
#define x18 do_not_use_x18
#define w18 do_not_use_w18
#if HAVE_SME
.macro sme_entry
AARCH64_SIGN_LINK_REGISTER
stp x29, x30, [sp, #-80]!
mov x29, sp
stp d8, d9, [sp, #16]
stp d10, d11, [sp, #32]
stp d12, d13, [sp, #48]
stp d14, d15, [sp, #64]
smstart
.endm
.macro sme_exit
smstop
ldp d8, d9, [sp, #16]
ldp d10, d11, [sp, #32]
ldp d12, d13, [sp, #48]
ldp d14, d15, [sp, #64]
ldp x29, x30, [sp], #80
AARCH64_VALIDATE_LINK_REGISTER
.endm
#endif

View File

@@ -917,6 +917,7 @@ FFT32_FN ns_float, 1
.macro SR_TRANSFORM_DEF len, next=0
\len:
AARCH64_SIGN_LINK_REGISTER
stp x20, x30, [sp, #-16]!
mov w20, #(\len/4)
mov x5, #((\len*4) - (\len/1))
@@ -926,6 +927,7 @@ FFT32_FN ns_float, 1
add x1, x1, x5
bl 32b
ldp x20, x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
ldr w5, =(\len*6 + \len/2)
sub x1, x1, x5
@@ -1061,6 +1063,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
b.gt 128f
ret
128:
AARCH64_SIGN_LINK_REGISTER
stp x20, x30, [sp, #-16]!
mov w20, #32
add x1, x1, #16*32
@@ -1068,6 +1071,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
add x1, x1, #8*32
bl 32b
ldp x20, x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
sub x1, x1, #24*32
SETUP_SR_RECOMB 128, x7, x8, x9
@@ -1081,6 +1085,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
b.gt 256f
ret
256:
AARCH64_SIGN_LINK_REGISTER
stp x20, x30, [sp, #-16]!
mov w20, #64
add x1, x1, #32*32
@@ -1088,6 +1093,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
add x1, x1, #16*32
bl 32b
ldp x20, x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
sub x1, x1, #48*32
SETUP_SR_RECOMB 256, x7, x8, x9
@@ -1102,6 +1108,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
b.gt 512f
ret
512:
AARCH64_SIGN_LINK_REGISTER
stp x20, x30, [sp, #-16]!
mov w20, #128
add x1, x1, #64*32
@@ -1109,6 +1116,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
add x1, x1, #32*32
bl 32b
ldp x20, x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
sub x1, x1, #96*32
SETUP_SR_RECOMB 512, x7, x8, x9
@@ -1128,6 +1136,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
ret
1024:
AARCH64_SIGN_LINK_REGISTER
stp x20, x30, [sp, #-16]!
mov w20, #256
add x1, x1, #96*32
@@ -1135,6 +1144,7 @@ function ff_tx_fft_sr_\name\()_neon, export=1
add x1, x1, #64*32
bl 32b
ldp x20, x30, [sp], #16
AARCH64_VALIDATE_LINK_REGISTER
mov x5, #192*32
sub x1, x1, x5