[PATCH v2 14/18] amd64: saner handling of odd address in csum_partial()

Al Viro <viro@xxxxxxxxxxxxxxxxxx> · Tue, 5 Dec 2023 02:24:14 +0000

all we want there is to have return value congruent to
result * 256 modulo 0xffff; no need to convert from
32bit to 16bit (i.e. take it modulo 0xffff) first -
cyclic shift of 32bit value by 8 bits (in either direction)
will work.

Kills the from32to16() helper and yields better code...

Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
---
 arch/x86/lib/csum-partial_64.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c
index 5e877592a7b3..192d4772c2a3 100644
--- a/arch/x86/lib/csum-partial_64.c
+++ b/arch/x86/lib/csum-partial_64.c
@@ -11,25 +11,13 @@
 #include <net/checksum.h>
 #include <asm/word-at-a-time.h>
 
-static inline unsigned short from32to16(unsigned a)
-{
-	unsigned short b = a >> 16;
-	asm("addw %w2,%w0\n\t"
-	    "adcw $0,%w0\n"
-	    : "=r" (b)
-	    : "0" (b), "r" (a));
-	return b;
-}
-
 static inline __wsum csum_tail(u64 temp64, int odd)
 {
 	unsigned int result;
 
 	result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff);
-	if (unlikely(odd)) {
-		result = from32to16(result);
-		result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-	}
+	if (unlikely(odd))
+		result = rol32(result, 8);
 	return (__force __wsum)result;
 }
 
-- 
2.39.2