Re: [PATCH] MIPS checksum fix

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On Sep 19, 2008, at 5:07 AM, Ralf Baechle wrote:

On Fri, Sep 19, 2008 at 01:47:43PM +0200, Ralf Baechle wrote:

I'm interested in test reports of this on all sorts of configurations -
32-bit, 64-bit, big / little endian, R2 processors and pre-R2.  In
particular Cavium being the only MIPS64 R2 implementation would be
interesting. This definately is stuff which should go upstream for 2.6.27.

There was a trivial bug in the R2 code.

From 97ad23f4696a322cb3bc379a25a8c0f6526751d6 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@xxxxxxxxxxxxxx>
Date: Fri, 19 Sep 2008 14:05:53 +0200
Subject: [PATCH] [MIPS] Fix 64-bit csum_partial, __csum_partial_copy_user and csum_partial_copy

On 64-bit machines it wouldn't handle a possible carry when adding the
32-bit folded checksum and checksum argument.

While at it, add a few trivial optimizations, also for R2 processors.

I tried this patch (with and without Atsushi's addition, shown below) on a MIPS64 today and the checksums were all bad (i.e. worse than the original problem).

Note that I had to manually create the diff, because of "malformed patch" errors at line 21 (second hunk).

If anyone would like to send me an updated unified diff for this issue, I can re-test today within the next day.

--
-bp

Signed-off-by: Ralf Baechle <ralf@xxxxxxxxxxxxxx>

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/ csum_partial.S
index 8d77841..c77a7a0 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -53,12 +53,14 @@
#define UNIT(unit)  ((unit)*NBYTES)

#define ADDC(sum,reg)						\
-	.set	push;						\
-	.set	noat;						\
	ADD	sum, reg;					\
	sltu	v1, sum, reg;					\
	ADD	sum, v1;					\
-	.set	pop
+
+#define ADDC32(sum,reg)						\
+	addu	sum, reg;					\
+	sltu	v1, sum, reg;					\
+	addu	sum, v1;					\

#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\
	LOAD	_t0, (offset + UNIT(0))(src);			\
@@ -254,8 +256,6 @@ LEAF(csum_partial)
1:	ADDC(sum, t1)

	/* fold checksum */
-	.set	push
-	.set	noat
#ifdef USE_DOUBLE
	dsll32	v1, sum, 0
	daddu	sum, v1
@@ -263,24 +263,25 @@ LEAF(csum_partial)
	dsra32	sum, sum, 0
	addu	sum, v1
#endif
-	sll	v1, sum, 16
-	addu	sum, v1
-	sltu	v1, sum, v1
-	srl	sum, sum, 16
-	addu	sum, v1

	/* odd buffer alignment? */
-	beqz	t7, 1f
-	 nop
-	sll	v1, sum, 8
+#ifdef CPU_MIPSR2
+	wsbh	v1, sum	
+	movn	sum, v1, t7
+#else
+	beqz	t7, 1f			/* odd buffer alignment? */
+	 lui	v1, 0x00ff
+	addu	v1, 0x00ff
+	and	t0, sum, v1
+	sll	t0, t0, 8
	srl	sum, sum, 8
-	or	sum, v1
-	andi	sum, 0xffff
-	.set	pop
+	and	sum, sum, v1
+	or	sum, sum, t0
1:
+#endif
	.set	reorder
	/* Add the passed partial csum.  */
-	ADDC(sum, a2)
+	ADDC32(sum, a2)
	jr	ra
	.set	noreorder
	END(csum_partial)
@@ -656,8 +657,6 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
	ADDC(sum, t2)
.Ldone:
	/* fold checksum */
-	.set	push
-	.set	noat
#ifdef USE_DOUBLE
	dsll32	v1, sum, 0
	daddu	sum, v1
@@ -665,23 +664,23 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
	dsra32	sum, sum, 0
	addu	sum, v1
#endif
-	sll	v1, sum, 16
-	addu	sum, v1
-	sltu	v1, sum, v1
-	srl	sum, sum, 16
-	addu	sum, v1

-	/* odd buffer alignment? */
-	beqz	odd, 1f
-	 nop
-	sll	v1, sum, 8
+#ifdef CPU_MIPSR2
+	wsbh	v1, sum
+	movn	sum, v1, odd
+#else
+	beqz	odd, 1f			/* odd buffer alignment? */
+	 lui	v1, 0x00ff
+	addu	v1, 0x00ff
+	and	t0, sum, v1
+	sll	t0, t0, 8
	srl	sum, sum, 8
-	or	sum, v1
-	andi	sum, 0xffff
-	.set	pop
+	and	sum, sum, v1
+	or	sum, sum, t0
1:
+#endif
	.set reorder
-	ADDC(sum, psum)
+	ADDC32(sum, psum)
	jr	ra
	.set noreorder





Begin forwarded message:

From: Atsushi Nemoto <anemo@xxxxxxxxxxxxx>
Date: September 19, 2008 8:43:19 AM PDT
To: u1@xxxxxxxxxx
Cc: macro@xxxxxxxxxxxxxx, linux-mips@xxxxxxxxxxxxxx
Subject: Re: MIPS checksum bug

On Fri, 19 Sep 2008 01:17:04 +0900 (JST), Atsushi Nemoto <anemo@xxxxxxxxxxxxx > wrote:
Thank you for testing.  Though this patch did not fixed your problem,
I still have a doubt on 64-bit optimization.

If your hardware could run 32-bit kernel, could you confirm the
problem can happens in 32-bit too or not?

I think I found possible breakage on 64-bit path.

There are some "lw" (and "ulw") used in 64-bit path and they should be
"lwu" (and "ulwu" ... but there is no such pseudo insn) to avoid
sign-extention.

Here is a completely untested patch.

diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/ csum_partial.S
index 8d77841..40f9174 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -39,12 +39,14 @@
#ifdef USE_DOUBLE

#define LOAD   ld
+#define LOAD32 lwu
#define ADD    daddu
#define NBYTES 8

#else

#define LOAD   lw
+#define LOAD32 lw
#define ADD    addu
#define NBYTES 4

@@ -60,6 +62,14 @@
	ADD	sum, v1;					\
	.set	pop

+#define ADDC32(sum,reg)						\
+	.set	push;						\
+	.set	noat;						\
+	addu	sum, reg;					\
+	sltu	v1, sum, reg;					\
+	addu	sum, v1;					\
+	.set	pop
+
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\
	LOAD	_t0, (offset + UNIT(0))(src);			\
	LOAD	_t1, (offset + UNIT(1))(src);			\
@@ -132,7 +142,7 @@ LEAF(csum_partial)
	beqz	t8, .Lqword_align
	 andi	t8, src, 0x8

-	lw	t0, 0x00(src)
+	LOAD32	t0, 0x00(src)
	LONG_SUBU	a1, a1, 0x4
	ADDC(sum, t0)
	PTR_ADDU	src, src, 0x4
@@ -211,7 +221,7 @@ LEAF(csum_partial)
	LONG_SRL	t8, t8, 0x2

.Lend_words:
-	lw	t0, (src)
+	LOAD32	t0, (src)
	LONG_SUBU	t8, t8, 0x1
	ADDC(sum, t0)
	.set	reorder				/* DADDI_WAR */
@@ -229,6 +239,9 @@ LEAF(csum_partial)

	/* Still a full word to go  */
	ulw	t1, (src)
+#ifdef USE_DOUBLE
+	add	t1, zero	/* clear upper 32bit */
+#endif
	PTR_ADDIU	src, 4
	ADDC(sum, t1)

@@ -280,7 +293,7 @@ LEAF(csum_partial)
1:
	.set	reorder
	/* Add the passed partial csum.  */
-	ADDC(sum, a2)
+	ADDC32(sum, a2)
	jr	ra
	.set	noreorder
	END(csum_partial)
@@ -681,7 +694,7 @@ EXC(	sb	t0, NBYTES-2(dst), .Ls_exc)
	.set	pop
1:
	.set reorder
-	ADDC(sum, psum)
+	ADDC32(sum, psum)
	jr	ra
	.set noreorder



[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux