mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
avcodec/x86/bswapdsp: Avoid aligned vs unaligned codepaths for AVX2
For modern cpus (like those supporting AVX2) loads and stores using the unaligned versions of instructions are as fast as aligned ones if the address is aligned, so remove the aligned AVX2 version (and the alignment check) and just use the unaligned one. Reviewed-by: Lynne <dev@lynne.ee> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -100,10 +100,15 @@ SECTION .text
|
||||
|
||||
; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
|
||||
%macro BSWAP32_BUF 0
|
||||
%if cpuflag(ssse3)||cpuflag(avx2)
|
||||
%if cpuflag(avx2)
|
||||
cglobal bswap32_buf, 3,4,3
|
||||
vbroadcasti128 m2, [pb_bswap32]
|
||||
BSWAP_LOOPS u
|
||||
%else
|
||||
%if cpuflag(ssse3)
|
||||
cglobal bswap32_buf, 3,4,3
|
||||
mova m2, [pb_bswap32]
|
||||
mov r3, r1
|
||||
VBROADCASTI128 m2, [pb_bswap32]
|
||||
%else
|
||||
cglobal bswap32_buf, 3,4,5
|
||||
mov r3, r1
|
||||
@@ -115,6 +120,7 @@ cglobal bswap32_buf, 3,4,5
|
||||
jmp .left
|
||||
.start_align:
|
||||
BSWAP_LOOPS a
|
||||
%endif
|
||||
.left:
|
||||
%if cpuflag(ssse3)
|
||||
test r2d, 2
|
||||
|
||||
Reference in New Issue
Block a user