[PATCH v2 2/3] sbc/sbc_primitives_sse: Optimize sbc_analyze_4s

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Luiz Augusto von Dentz <luiz.von.dentz@xxxxxxxxx>

This makes use 128 bit XMM registers whenever possible.

$ time src/sbcenc_mmx -s 4 sin_4m.au > /dev/null
real    0m1.073s
user    0m1.039s
sys     0m0.030s

===  After ====

$ time src/sbcenc -s 4 sin_4m.au > /dev/null
real    0m1.049s
user    0m1.000s
sys     0m0.047s
---
 sbc/sbc_primitives_sse.c | 58 +++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/sbc/sbc_primitives_sse.c b/sbc/sbc_primitives_sse.c
index 42cdb03..2a903e1 100644
--- a/sbc/sbc_primitives_sse.c
+++ b/sbc/sbc_primitives_sse.c
@@ -38,48 +38,40 @@
 static inline void sbc_analyze_four_sse(const int16_t *in, int32_t *out,
 					const FIXED_T *consts)
 {
-	static const SBC_ALIGNED int32_t round_c[2] = {
+	static const SBC_ALIGNED int32_t round_c[4] = {
+		1 << (SBC_PROTO_FIXED4_SCALE - 1),
+		1 << (SBC_PROTO_FIXED4_SCALE - 1),
 		1 << (SBC_PROTO_FIXED4_SCALE - 1),
 		1 << (SBC_PROTO_FIXED4_SCALE - 1),
 	};
 	__asm__ volatile (
-		"movq        (%0), %%mm0\n"
-		"movq       8(%0), %%mm1\n"
-		"pmaddwd     (%1), %%mm0\n"
-		"pmaddwd    8(%1), %%mm1\n"
-		"paddd       (%2), %%mm0\n"
-		"paddd       (%2), %%mm1\n"
+		"movdqu      (%0), %%xmm0\n"
+		"pmaddwd     (%1), %%xmm0\n"
+		"paddd       (%2), %%xmm0\n"
 		"\n"
-		"movq      16(%0), %%mm2\n"
-		"movq      24(%0), %%mm3\n"
-		"pmaddwd   16(%1), %%mm2\n"
-		"pmaddwd   24(%1), %%mm3\n"
-		"paddd      %%mm2, %%mm0\n"
-		"paddd      %%mm3, %%mm1\n"
+		"movdqu    16(%0), %%xmm1\n"
+		"pmaddwd   16(%1), %%xmm1\n"
+		"paddd     %%xmm1, %%xmm0\n"
 		"\n"
-		"movq      32(%0), %%mm2\n"
-		"movq      40(%0), %%mm3\n"
-		"pmaddwd   32(%1), %%mm2\n"
-		"pmaddwd   40(%1), %%mm3\n"
-		"paddd      %%mm2, %%mm0\n"
-		"paddd      %%mm3, %%mm1\n"
+		"movdqu    32(%0), %%xmm1\n"
+		"pmaddwd   32(%1), %%xmm1\n"
+		"paddd     %%xmm1, %%xmm0\n"
 		"\n"
-		"movq      48(%0), %%mm2\n"
-		"movq      56(%0), %%mm3\n"
-		"pmaddwd   48(%1), %%mm2\n"
-		"pmaddwd   56(%1), %%mm3\n"
-		"paddd      %%mm2, %%mm0\n"
-		"paddd      %%mm3, %%mm1\n"
+		"movdqu    48(%0), %%xmm1\n"
+		"pmaddwd   48(%1), %%xmm1\n"
+		"paddd     %%xmm1, %%xmm0\n"
 		"\n"
-		"movq      64(%0), %%mm2\n"
-		"movq      72(%0), %%mm3\n"
-		"pmaddwd   64(%1), %%mm2\n"
-		"pmaddwd   72(%1), %%mm3\n"
-		"paddd      %%mm2, %%mm0\n"
-		"paddd      %%mm3, %%mm1\n"
+		"movdqu    64(%0), %%xmm1\n"
+		"pmaddwd   64(%1), %%xmm1\n"
+		"paddd     %%xmm1, %%xmm0\n"
+		"\n"
+		"psrad         %4, %%xmm0\n"
+		"\n"
+		"movdqa    %%xmm0, %%xmm1\n"
+		"punpckhqdq %%xmm1, %%xmm1\n"
+		"movdq2q   %%xmm0, %%mm0\n"
+		"movdq2q   %%xmm1, %%mm1\n"
 		"\n"
-		"psrad         %4, %%mm0\n"
-		"psrad         %4, %%mm1\n"
 		"packssdw   %%mm0, %%mm0\n"
 		"packssdw   %%mm1, %%mm1\n"
 		"\n"
-- 
2.26.2




[Index of Archives]     [Bluez Devel]     [Linux Wireless Networking]     [Linux Wireless Personal Area Networking]     [Linux ATH6KL]     [Linux USB Devel]     [Linux Media Drivers]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [Big List of Linux Books]

  Powered by Linux