diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index 7c896db179..32564058ba 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -36,24 +36,32 @@ cextern rv40_bias SECTION .text -%macro mv0_pixels_mc8 0 +%macro mv0_pixels_mc8 1 lea r4, [r2*2 ] .next4rows: - movq mm0, [r1 ] - movq mm1, [r1+r2] + movq m0, [r1 ] + movq m1, [r1+r2] add r1, r4 - CHROMAMC_AVG mm0, [r0 ] - CHROMAMC_AVG mm1, [r0+r2] - movq [r0 ], mm0 - movq [r0+r2], mm1 +%ifidn %1,avg + movq m2, [r0] + movq m3, [r0+r2] + pavgb m0, m2 + pavgb m1, m3 +%endif + movq [r0 ], m0 + movq [r0+r2], m1 add r0, r4 - movq mm0, [r1 ] - movq mm1, [r1+r2] + movq m0, [r1 ] + movq m1, [r1+r2] add r1, r4 - CHROMAMC_AVG mm0, [r0 ] - CHROMAMC_AVG mm1, [r0+r2] - movq [r0 ], mm0 - movq [r0+r2], mm1 +%ifidn %1,avg + movq m2, [r0] + movq m3, [r0+r2] + pavgb m0, m2 + pavgb m1, m3 +%endif + movq [r0 ], m0 + movq [r0+r2], m1 add r0, r4 sub r3d, 4 jne .next4rows @@ -133,7 +141,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7+UNIX64, 8 jne .at_least_one_non_zero ; mx == 0 AND my == 0 - no filter needed ..@%1_%2_chroma_mc8_no_filter_ %+ cpuname: - mv0_pixels_mc8 + mv0_pixels_mc8 %1 RET .at_least_one_non_zero: diff --git a/tests/checkasm/rv40dsp.c b/tests/checkasm/rv40dsp.c index a5364d0a60..e76ec03da9 100644 --- a/tests/checkasm/rv40dsp.c +++ b/tests/checkasm/rv40dsp.c @@ -37,8 +37,8 @@ static void check_chroma_mc(void) LOCAL_ALIGNED_32(uint8_t, dst0, [16 * 18 * 2]); LOCAL_ALIGNED_32(uint8_t, dst1, [16 * 18 * 2]); - declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, const uint8_t *src, - ptrdiff_t stride, int h, int x, int y); + declare_func(void, uint8_t *dst, const uint8_t *src, + ptrdiff_t stride, int h, int x, int y); ff_rv40dsp_init(&h); randomize_buffers();