2014/1/21 Steven J. Hill <Steven.Hill@xxxxxxxxxx>: > From: Leonid Yegoshin <Leonid.Yegoshin@xxxxxxxxxx> > > Use the PREF instruction to optimize partial checksum operations. This does look like a nice feature, do you have any performance benchmark results? > > Signed-off-by: Leonid Yegoshin <Leonid.Yegoshin@xxxxxxxxxx> > Signed-off-by: Steven J. Hill <Steven.Hill@xxxxxxxxxx> > --- > arch/mips/lib/csum_partial.S | 12 ++++++++++++ > 1 file changed, 12 insertions(+) > > diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S > index a6adffb..272820e 100644 > --- a/arch/mips/lib/csum_partial.S > +++ b/arch/mips/lib/csum_partial.S > @@ -417,13 +417,19 @@ FEXPORT(csum_partial_copy_nocheck) > * > * If len < NBYTES use byte operations. > */ > + PREF( 0, 0(src)) > + PREF( 1, 0(dst)) > sltu t2, len, NBYTES > and t1, dst, ADDRMASK > bnez t2, .Lcopy_bytes_checklen > + PREF( 0, 32(src)) > + PREF( 1, 32(dst)) > and t0, src, ADDRMASK > andi odd, dst, 0x1 /* odd buffer? */ > bnez t1, .Ldst_unaligned > nop > + PREF( 0, 2*32(src)) > + PREF( 1, 2*32(dst)) > bnez t0, .Lsrc_unaligned_dst_aligned > /* > * use delay slot for fall-through > @@ -434,6 +440,8 @@ FEXPORT(csum_partial_copy_nocheck) > beqz t0, .Lcleanup_both_aligned # len < 8*NBYTES > nop > SUB len, 8*NBYTES # subtract here for bgez loop > + PREF( 0, 3*32(src)) > + PREF( 1, 3*32(dst)) > .align 4 > 1: > EXC( LOAD t0, UNIT(0)(src), .Ll_exc) > @@ -464,6 +472,8 @@ EXC( STORE t7, UNIT(7)(dst), .Ls_exc) > ADDC(sum, t7) > .set reorder /* DADDI_WAR */ > ADD dst, dst, 8*NBYTES > + PREF( 0, 8*32(src)) > + PREF( 1, 8*32(dst)) > bgez len, 1b > .set noreorder > ADD len, 8*NBYTES # revert len (see above) > @@ -569,8 +579,10 @@ EXC( STFIRST t3, FIRST(0)(dst), .Ls_exc) > > .Lsrc_unaligned_dst_aligned: > SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter > + PREF( 0, 3*32(src)) > beqz t0, .Lcleanup_src_unaligned > and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES > + PREF( 1, 3*32(dst)) > 1: > /* > * Avoid consecutive LD*'s to the same register since some mips > -- > 1.8.3.2 > > -- Florian