avcodec/x86/bswapdsp: Avoid aligned vs unaligned codepaths for AVX2

For modern cpus (like those supporting AVX2) loads and stores
using the unaligned versions of instructions are as fast
as aligned ones if the address is aligned, so remove
the aligned AVX2 version (and the alignment check) and just
use the unaligned one.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2026-02-27 13:54:21 +01:00
parent 55afe49dd0
commit aa483bc422

View File

@@ -100,10 +100,15 @@ SECTION .text
; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
%macro BSWAP32_BUF 0
%if cpuflag(ssse3)||cpuflag(avx2)
%if cpuflag(avx2)
cglobal bswap32_buf, 3,4,3
vbroadcasti128 m2, [pb_bswap32]
BSWAP_LOOPS u
%else
%if cpuflag(ssse3)
cglobal bswap32_buf, 3,4,3
mova m2, [pb_bswap32]
mov r3, r1
VBROADCASTI128 m2, [pb_bswap32]
%else
cglobal bswap32_buf, 3,4,5
mov r3, r1
@@ -115,6 +120,7 @@ cglobal bswap32_buf, 3,4,5
jmp .left
.start_align:
BSWAP_LOOPS a
%endif
.left:
%if cpuflag(ssse3)
test r2d, 2