avcodec/x86/cfhdencdsp: Avoid unnecessary constants

Up until now, cfhdencdsp used constants consisting
of -1, 1, ...,-1,1 words and 1, -1,...,1,-1 words
for use as constants in pmaddwd. But one can use
the same constants if one shuffles the words in
a dword the opposite order. Similarly for some other
constants. This also allowed to avoid a register in
chfdenc_vert_filter.

Reviewed-by: James Almer <jamrial@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
Andreas Rheinhardt
2026-01-25 20:16:57 +01:00
parent cd3d8116fb
commit 91c7710412

View File

@@ -24,11 +24,8 @@
SECTION_RODATA
pw_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1
pw_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1
pw_p5_n11: dw 5, -11, 5, -11, 5, -11, 5, -11
pw_n5_p11: dw -5, 11, -5, 11, -5, 11, -5, 11
pw_p11_n5: dw 11, -5, 11, -5, 11, -5, 11, -5
pw_n11_p5: dw -11, 5, -11, 5, -11, 5, -11, 5
pd_4: times 4 dd 4
pw_n4: times 8 dw -4
cextern pw_1
@@ -44,7 +41,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, istride, lwidth, hwid
shl hwidthq, 1
mova m7, [pd_4]
mova m8, [pw_1]
pcmpeqw m9, m9 ; -1
pcmpeqw m9, m9 ; -1
mova m10,[pw_p1_n1]
movsxdifnidn yq, yd
movsxdifnidn widthq, widthd
@@ -196,7 +193,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, istride, lwidth, hwid
%if ARCH_X86_64
INIT_XMM sse2
cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidth, width, height, x, y, pos
cglobal cfhdenc_vert_filter, 8, 11, 13, input, low, high, istride, lwidth, hwidth, width, height, x, y, pos
shl istrideq, 1
shl widthd, 1
@@ -208,9 +205,8 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
mova m8, [pw_1]
pcmpeqw m9, m9 ; -1
mova m10,[pw_p1_n1]
mova m11,[pw_n1_p1]
mova m12,[pw_4]
mova m13,[pw_n4]
mova m11, [pw_4]
mova m12, [pw_n4]
.loopw:
mov yq, 2
@@ -237,9 +233,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
add posq, istrideq
movu m5, [inputq + posq]
mova m6, m0
punpcklwd m0, m1
punpckhwd m1, m6
SBUTTERFLY wd, 0, 1, 6
mova m6, m2
punpcklwd m2, m3
@@ -250,9 +244,9 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
punpckhwd m5, m6
pmaddwd m0, [pw_p5_n11]
pmaddwd m1, [pw_n11_p5]
pmaddwd m2, m12
pmaddwd m3, m12
pmaddwd m1, [pw_p5_n11]
pmaddwd m2, m11
pmaddwd m3, m11
pmaddwd m4, m9
pmaddwd m5, m9
@@ -313,9 +307,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
punpcklwd m0, m1
punpckhwd m1, m6
mova m6, m2
punpcklwd m2, m3
punpckhwd m3, m6
SBUTTERFLY wd, 2, 3, 6
mova m6, m4
punpcklwd m4, m5
@@ -324,7 +316,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
pmaddwd m0, m9
pmaddwd m1, m9
pmaddwd m2, m10
pmaddwd m3, m11
pmaddwd m3, m10
pmaddwd m4, m8
pmaddwd m5, m8
@@ -394,16 +386,14 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
punpcklwd m2, m3
punpckhwd m3, m6
mova m6, m4
punpcklwd m4, m5
punpckhwd m5, m6
SBUTTERFLY wd, 4, 5, 6
pmaddwd m0, m8
pmaddwd m1, m8
pmaddwd m2, m13
pmaddwd m3, m13
pmaddwd m2, m12
pmaddwd m3, m12
pmaddwd m4, [pw_p11_n5]
pmaddwd m5, [pw_n5_p11]
pmaddwd m5, [pw_p11_n5]
paddd m4, m2
paddd m5, m3