From aa483bc42201e673cf7172be5205c14ada26bdbd Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Fri, 27 Feb 2026 13:54:21 +0100 Subject: [PATCH] avcodec/x86/bswapdsp: Avoid aligned vs unaligned codepaths for AVX2 For modern cpus (like those supporting AVX2) loads and stores using the unaligned versions of instructions are as fast as aligned ones if the address is aligned, so remove the aligned AVX2 version (and the alignment check) and just use the unaligned one. Reviewed-by: Lynne Signed-off-by: Andreas Rheinhardt --- libavcodec/x86/bswapdsp.asm | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm index f89ca76cf1..06f77c5921 100644 --- a/libavcodec/x86/bswapdsp.asm +++ b/libavcodec/x86/bswapdsp.asm @@ -100,10 +100,15 @@ SECTION .text ; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w); %macro BSWAP32_BUF 0 -%if cpuflag(ssse3)||cpuflag(avx2) +%if cpuflag(avx2) cglobal bswap32_buf, 3,4,3 + vbroadcasti128 m2, [pb_bswap32] + BSWAP_LOOPS u +%else +%if cpuflag(ssse3) +cglobal bswap32_buf, 3,4,3 + mova m2, [pb_bswap32] mov r3, r1 - VBROADCASTI128 m2, [pb_bswap32] %else cglobal bswap32_buf, 3,4,5 mov r3, r1 @@ -115,6 +120,7 @@ cglobal bswap32_buf, 3,4,5 jmp .left .start_align: BSWAP_LOOPS a +%endif .left: %if cpuflag(ssse3) test r2d, 2