avfilter/x86/vf_fspp: Port mul_thrmat to SSE2

This fixes an ABI violation, as mul_thrmat did not issue emms. It seems that this ABI violation could reach the user, namely if ff_get_video_buffer() fails. Notice that ff_get_video_buffer() itself could fail because of this, namely if the allocator uses floating point registers. On x64 (where GCC already used SSE2 in the C version) mul_thrmat_c: 4.4 ( 1.00x) mul_thrmat_mmx: 8.6 ( 0.52x) mul_thrmat_sse2: 4.4 ( 1.00x) On 32bit (where SSE2 is not known to be available): mul_thrmat_c: 56.0 ( 1.00x) mul_thrmat_sse2: 6.0 ( 9.40x) Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2026-04-20 21:00:41 +08:00 · 2025-11-09 19:10:30 +01:00
parent 70eb8a76a9
commit 52ba2ac7bd
5 changed files with 45 additions and 61 deletions
--- a/tests/checkasm/vf_fspp.c
+++ b/tests/checkasm/vf_fspp.c
@@ -18,6 +18,7 @@

 #include "checkasm.h"
 #include "libavfilter/vf_fsppdsp.h"
+#include "libavutil/mem_internal.h"

 #define randomize_buffers(buf)                           \
    do {                                                 \
@@ -29,10 +30,11 @@
 static void check_mul_thrmat(void)
 {
    FSPPDSPContext fspp;
-    int16_t src[64];
-    int16_t dst_ref[64], dst_new[64];
+    DECLARE_ALIGNED(16, int16_t, src)[64];
+    DECLARE_ALIGNED(16, int16_t, dst_ref)[64];
+    DECLARE_ALIGNED(16, int16_t, dst_new)[64];
    const int q = (uint8_t)rnd();
-    declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *thr_adr_noq, int16_t *thr_adr, int q);
+    declare_func(void, int16_t *thr_adr_noq, int16_t *thr_adr, int q);

    ff_fsppdsp_init(&fspp);