all we want there is to have return value congruent to result * 256 modulo 0xffff; no need to convert from 32bit to 16bit (i.e. take it modulo 0xffff) first - cyclic shift of 32bit value by 8 bits (in either direction) will work. Kills the from32to16() helper and yields better code... Signed-off-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx> --- arch/x86/lib/csum-partial_64.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index 5e877592a7b3..192d4772c2a3 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -11,25 +11,13 @@ #include <net/checksum.h> #include <asm/word-at-a-time.h> -static inline unsigned short from32to16(unsigned a) -{ - unsigned short b = a >> 16; - asm("addw %w2,%w0\n\t" - "adcw $0,%w0\n" - : "=r" (b) - : "0" (b), "r" (a)); - return b; -} - static inline __wsum csum_tail(u64 temp64, int odd) { unsigned int result; result = add32_with_carry(temp64 >> 32, temp64 & 0xffffffff); - if (unlikely(odd)) { - result = from32to16(result); - result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); - } + if (unlikely(odd)) + result = rol32(result, 8); return (__force __wsum)result; } -- 2.39.2