mirror of
https://mirror.skon.top/https://github.com/FFmpeg/FFmpeg
synced 2026-04-20 21:00:41 +08:00
avcodec/x86/cfhdencdsp: Avoid unnecessary constants
Up until now, cfhdencdsp used constants consisting of -1, 1, ...,-1,1 words and 1, -1,...,1,-1 words for use as constants in pmaddwd. But one can use the same constants if one shuffles the words in a dword the opposite order. Similarly for some other constants. This also allowed to avoid a register in chfdenc_vert_filter. Reviewed-by: James Almer <jamrial@gmail.com> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
This commit is contained in:
@@ -24,11 +24,8 @@
|
||||
SECTION_RODATA
|
||||
|
||||
pw_p1_n1: dw 1, -1, 1, -1, 1, -1, 1, -1
|
||||
pw_n1_p1: dw -1, 1, -1, 1, -1, 1, -1, 1
|
||||
pw_p5_n11: dw 5, -11, 5, -11, 5, -11, 5, -11
|
||||
pw_n5_p11: dw -5, 11, -5, 11, -5, 11, -5, 11
|
||||
pw_p11_n5: dw 11, -5, 11, -5, 11, -5, 11, -5
|
||||
pw_n11_p5: dw -11, 5, -11, 5, -11, 5, -11, 5
|
||||
pd_4: times 4 dd 4
|
||||
pw_n4: times 8 dw -4
|
||||
cextern pw_1
|
||||
@@ -44,7 +41,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, istride, lwidth, hwid
|
||||
shl hwidthq, 1
|
||||
mova m7, [pd_4]
|
||||
mova m8, [pw_1]
|
||||
pcmpeqw m9, m9 ; -1
|
||||
pcmpeqw m9, m9 ; -1
|
||||
mova m10,[pw_p1_n1]
|
||||
movsxdifnidn yq, yd
|
||||
movsxdifnidn widthq, widthd
|
||||
@@ -196,7 +193,7 @@ cglobal cfhdenc_horiz_filter, 8, 10, 11, input, low, high, istride, lwidth, hwid
|
||||
|
||||
%if ARCH_X86_64
|
||||
INIT_XMM sse2
|
||||
cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidth, width, height, x, y, pos
|
||||
cglobal cfhdenc_vert_filter, 8, 11, 13, input, low, high, istride, lwidth, hwidth, width, height, x, y, pos
|
||||
shl istrideq, 1
|
||||
|
||||
shl widthd, 1
|
||||
@@ -208,9 +205,8 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
|
||||
mova m8, [pw_1]
|
||||
pcmpeqw m9, m9 ; -1
|
||||
mova m10,[pw_p1_n1]
|
||||
mova m11,[pw_n1_p1]
|
||||
mova m12,[pw_4]
|
||||
mova m13,[pw_n4]
|
||||
mova m11, [pw_4]
|
||||
mova m12, [pw_n4]
|
||||
.loopw:
|
||||
mov yq, 2
|
||||
|
||||
@@ -237,9 +233,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
|
||||
add posq, istrideq
|
||||
movu m5, [inputq + posq]
|
||||
|
||||
mova m6, m0
|
||||
punpcklwd m0, m1
|
||||
punpckhwd m1, m6
|
||||
SBUTTERFLY wd, 0, 1, 6
|
||||
|
||||
mova m6, m2
|
||||
punpcklwd m2, m3
|
||||
@@ -250,9 +244,9 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
|
||||
punpckhwd m5, m6
|
||||
|
||||
pmaddwd m0, [pw_p5_n11]
|
||||
pmaddwd m1, [pw_n11_p5]
|
||||
pmaddwd m2, m12
|
||||
pmaddwd m3, m12
|
||||
pmaddwd m1, [pw_p5_n11]
|
||||
pmaddwd m2, m11
|
||||
pmaddwd m3, m11
|
||||
pmaddwd m4, m9
|
||||
pmaddwd m5, m9
|
||||
|
||||
@@ -313,9 +307,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
|
||||
punpcklwd m0, m1
|
||||
punpckhwd m1, m6
|
||||
|
||||
mova m6, m2
|
||||
punpcklwd m2, m3
|
||||
punpckhwd m3, m6
|
||||
SBUTTERFLY wd, 2, 3, 6
|
||||
|
||||
mova m6, m4
|
||||
punpcklwd m4, m5
|
||||
@@ -324,7 +316,7 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
|
||||
pmaddwd m0, m9
|
||||
pmaddwd m1, m9
|
||||
pmaddwd m2, m10
|
||||
pmaddwd m3, m11
|
||||
pmaddwd m3, m10
|
||||
pmaddwd m4, m8
|
||||
pmaddwd m5, m8
|
||||
|
||||
@@ -394,16 +386,14 @@ cglobal cfhdenc_vert_filter, 8, 11, 14, input, low, high, istride, lwidth, hwidt
|
||||
punpcklwd m2, m3
|
||||
punpckhwd m3, m6
|
||||
|
||||
mova m6, m4
|
||||
punpcklwd m4, m5
|
||||
punpckhwd m5, m6
|
||||
SBUTTERFLY wd, 4, 5, 6
|
||||
|
||||
pmaddwd m0, m8
|
||||
pmaddwd m1, m8
|
||||
pmaddwd m2, m13
|
||||
pmaddwd m3, m13
|
||||
pmaddwd m2, m12
|
||||
pmaddwd m3, m12
|
||||
pmaddwd m4, [pw_p11_n5]
|
||||
pmaddwd m5, [pw_n5_p11]
|
||||
pmaddwd m5, [pw_p11_n5]
|
||||
|
||||
paddd m4, m2
|
||||
paddd m5, m3
|
||||
|
||||
Reference in New Issue
Block a user