On Fri, 19 Sep 2008 01:17:04 +0900 (JST), Atsushi Nemoto <anemo@xxxxxxxxxxxxx> wrote: > Thank you for testing. Though this patch did not fixed your problem, > I still have a doubt on 64-bit optimization. > > If your hardware could run 32-bit kernel, could you confirm the > problem can happens in 32-bit too or not? I think I found possible breakage on 64-bit path. There are some "lw" (and "ulw") used in 64-bit path and they should be "lwu" (and "ulwu" ... but there is no such pseudo insn) to avoid sign-extention. Here is a completely untested patch. diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index 8d77841..40f9174 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -39,12 +39,14 @@ #ifdef USE_DOUBLE #define LOAD ld +#define LOAD32 lwu #define ADD daddu #define NBYTES 8 #else #define LOAD lw +#define LOAD32 lw #define ADD addu #define NBYTES 4 @@ -60,6 +62,14 @@ ADD sum, v1; \ .set pop +#define ADDC32(sum,reg) \ + .set push; \ + .set noat; \ + addu sum, reg; \ + sltu v1, sum, reg; \ + addu sum, v1; \ + .set pop + #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ LOAD _t0, (offset + UNIT(0))(src); \ LOAD _t1, (offset + UNIT(1))(src); \ @@ -132,7 +142,7 @@ LEAF(csum_partial) beqz t8, .Lqword_align andi t8, src, 0x8 - lw t0, 0x00(src) + LOAD32 t0, 0x00(src) LONG_SUBU a1, a1, 0x4 ADDC(sum, t0) PTR_ADDU src, src, 0x4 @@ -211,7 +221,7 @@ LEAF(csum_partial) LONG_SRL t8, t8, 0x2 .Lend_words: - lw t0, (src) + LOAD32 t0, (src) LONG_SUBU t8, t8, 0x1 ADDC(sum, t0) .set reorder /* DADDI_WAR */ @@ -229,6 +239,9 @@ LEAF(csum_partial) /* Still a full word to go */ ulw t1, (src) +#ifdef USE_DOUBLE + add t1, zero /* clear upper 32bit */ +#endif PTR_ADDIU src, 4 ADDC(sum, t1) @@ -280,7 +293,7 @@ LEAF(csum_partial) 1: .set reorder /* Add the passed partial csum. */ - ADDC(sum, a2) + ADDC32(sum, a2) jr ra .set noreorder END(csum_partial) @@ -681,7 +694,7 @@ EXC( sb t0, NBYTES-2(dst), .Ls_exc) .set pop 1: .set reorder - ADDC(sum, psum) + ADDC32(sum, psum) jr ra .set noreorder