avcodec/x86/rv34dsp: Port ff_rv34_idct_dc_noround_mmxext to sse2

No change in benchmarks here.

Reviewed-by: Lynne <dev@lynne.ee>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2026-03-18 10:39:33 +01:00
parent b50cbdc04f
commit c90cf2aa1f
3 changed files with 10 additions and 12 deletions

View File

@@ -44,16 +44,14 @@ SECTION .text
sar %1, 10
%endmacro
INIT_MMX mmxext
cglobal rv34_idct_dc_noround, 1, 2, 0
INIT_XMM sse2
cglobal rv34_idct_dc_noround, 1, 2, 1
movsx r1, word [r0]
IDCT_DC_NOROUND r1
movd m0, r1d
pshufw m0, m0, 0
movq [r0+ 0], m0
movq [r0+ 8], m0
movq [r0+16], m0
movq [r0+24], m0
SPLATW m0, m0
mova [r0+ 0], m0
mova [r0+16], m0
RET
; Load coeffs and perform row transform

View File

@@ -24,8 +24,7 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/rv34dsp.h"
void ff_rv34_idct_dc_mmxext(int16_t *block);
void ff_rv34_idct_dc_noround_mmxext(int16_t *block);
void ff_rv34_idct_dc_noround_sse2(int16_t *block);
void ff_rv34_idct_dc_add_sse2(uint8_t *dst, ptrdiff_t stride, int dc);
void ff_rv34_idct_dc_add_sse4(uint8_t *dst, ptrdiff_t stride, int dc);
void ff_rv34_idct_add_mmxext(uint8_t *dst, ptrdiff_t stride, int16_t *block);
@@ -35,11 +34,12 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c)
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_mmxext;
c->rv34_idct_add = ff_rv34_idct_add_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags))
if (EXTERNAL_SSE2(cpu_flags)) {
c->rv34_inv_transform_dc = ff_rv34_idct_dc_noround_sse2;
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse2;
}
if (EXTERNAL_SSE4(cpu_flags))
c->rv34_idct_dc_add = ff_rv34_idct_dc_add_sse4;
}

View File

@@ -34,7 +34,7 @@
} while (0)
static void test_rv34_inv_transform_dc(RV34DSPContext *s) {
declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block);
declare_func(void, int16_t *block);
if (check_func(s->rv34_inv_transform_dc, "rv34_inv_transform_dc")) {
LOCAL_ALIGNED_16(int16_t, p1, [BUF_SIZE]);