[PATCH v1 1/3] sparc64: NG4 memset/memcpy 32 bits overflow

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Early in boot Linux patches memset and memcpy to branch to platform
optimized versions of these routines. The NG4 (Niagra 4) versions are
currently used on  all platforms starting from T4. Recently, there were M7
optimized routines added into UEK4 but not into mainline yet. So, even with
M7 optimized routines NG4 are still going to be used on T4, T5, M5, and M6
processors.

While investigating how to improve initialization time of dentry_hashtable
which is 8G long on M6 ldom with 7T of main memory, I noticed that memset()
does not reset all the memory in this array, after studying the code, I
realized that NG4memset() branches use %icc register instead of %xcc to
check compare, so if value of length is over 32-bit long, which is true for
8G array, these routines fail to work properly.

The fix is to replace all %icc with %xcc in these routines. (Alternative is
to use %ncc, but this is misleading, as the code already has sparcv9 only
instructions, and cannot be compiled on 32-bit).

This is important to fix this bug, because even older T4-4 can have 2T of
memory, and there are large memory proportional data structures in kernel
which can be larger than 4G in size. The failing of memset() is silent and
corruption is hard to detect.

Signed-off-by: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx>
Reviewed-by: Babu Moger <babu.moger@xxxxxxxxxx>
---
 arch/sparc/lib/NG4memcpy.S |   71 ++++++++++++++++++++------------------------
 arch/sparc/lib/NG4memset.S |   26 ++++++++--------
 2 files changed, 45 insertions(+), 52 deletions(-)

diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S
index 75bb93b..60ccb46 100644
--- a/arch/sparc/lib/NG4memcpy.S
+++ b/arch/sparc/lib/NG4memcpy.S
@@ -18,7 +18,7 @@
 #define FPU_ENTER			\
 	rd	%fprs, %o5;		\
 	andcc	%o5, FPRS_FEF, %g0;	\
-	be,a,pn	%icc, 999f;		\
+	be,a,pn	%xcc, 999f;		\
 	 wr	%g0, FPRS_FEF, %fprs;	\
 	999:
 
@@ -84,10 +84,6 @@
 #define PREAMBLE
 #endif
 
-#ifndef XCC
-#define XCC xcc
-#endif
-
 	.register	%g2,#scratch
 	.register	%g3,#scratch
 
@@ -252,19 +248,16 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 #ifdef MEMCPY_DEBUG
 	wr		%g0, 0x80, %asi
 #endif
-	srlx		%o2, 31, %g2
-	cmp		%g2, 0
-	tne		%XCC, 5
 	PREAMBLE
 	mov		%o0, %o3
 	brz,pn		%o2, .Lexit
 	 cmp		%o2, 3
-	ble,pn		%icc, .Ltiny
+	ble,pn		%xcc, .Ltiny
 	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall
+	ble,pn		%xcc, .Lsmall
 	 or		%o0, %o1, %g2
 	cmp		%o2, 128
-	bl,pn		%icc, .Lmedium
+	bl,pn		%xcc, .Lmedium
 	 nop
 
 .Llarge:/* len >= 0x80 */
@@ -279,7 +272,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	add		%o1, 1, %o1
 	subcc		%g1, 1, %g1
 	add		%o0, 1, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 
 51:	LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
@@ -295,7 +288,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	 * loop, or we require the alignaddr/faligndata variant.
 	 */
 	andcc		%o1, 0x7, %o5
-	bne,pn		%icc, .Llarge_src_unaligned
+	bne,pn		%xcc, .Llarge_src_unaligned
 	 sub		%g0, %o0, %g1
 
 	/* Legitimize the use of initializing stores by getting dest
@@ -309,7 +302,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	add		%o1, 8, %o1
 	subcc		%g1, 8, %g1
 	add		%o0, 8, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8)
 
 .Llarge_aligned:
@@ -343,16 +336,16 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	add		%o0, 0x08, %o0
 	EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8)
 	add		%o0, 0x08, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
 
 	membar		#StoreLoad | #StoreStore
 
 	brz,pn		%o2, .Lexit
 	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall_unaligned
+	ble,pn		%xcc, .Lsmall_unaligned
 	 nop
-	ba,a,pt		%icc, .Lmedium_noprefetch
+	ba,a,pt		%xcc, .Lmedium_noprefetch
 
 .Lexit:	retl
 	 mov		EX_RETVAL(%o3), %o0
@@ -395,7 +388,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16)
 	EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8)
 	add		%o0, 0x40, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
 #ifdef NON_USER_COPY
 	VISExitHalfFast
@@ -404,9 +397,9 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 #endif
 	brz,pn		%o2, .Lexit
 	 cmp		%o2, 19
-	ble,pn		%icc, .Lsmall_unaligned
+	ble,pn		%xcc, .Lsmall_unaligned
 	 nop
-	ba,a,pt		%icc, .Lmedium_unaligned
+	ba,a,pt		%xcc, .Lmedium_unaligned
 
 #ifdef NON_USER_COPY
 .Lmedium_vis_entry_fail:
@@ -415,11 +408,11 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 .Lmedium:
 	LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
 	andcc		%g2, 0x7, %g0
-	bne,pn		%icc, .Lmedium_unaligned
+	bne,pn		%xcc, .Lmedium_unaligned
 	 nop
 .Lmedium_noprefetch:
 	andncc		%o2, 0x20 - 1, %o5
-	be,pn		%icc, 2f
+	be,pn		%xcc, 2f
 	 sub		%o2, %o5, %o2
 1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
 	EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5)
@@ -431,29 +424,29 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24)
 	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24)
 	EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8)
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 add		%o0, 0x20, %o0
 2:	andcc		%o2, 0x18, %o5
-	be,pt		%icc, 3f
+	be,pt		%xcc, 3f
 	 sub		%o2, %o5, %o2
 
 1:	EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5)
 	add		%o1, 0x08, %o1
 	add		%o0, 0x08, %o0
 	subcc		%o5, 0x08, %o5
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8)
 3:	brz,pt		%o2, .Lexit
 	 cmp		%o2, 0x04
-	bl,pn		%icc, .Ltiny
+	bl,pn		%xcc, .Ltiny
 	 nop
 	EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2)
 	add		%o1, 0x04, %o1
 	add		%o0, 0x04, %o0
 	subcc		%o2, 0x04, %o2
-	bne,pn		%icc, .Ltiny
+	bne,pn		%xcc, .Ltiny
 	 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4)
-	ba,a,pt		%icc, .Lexit
+	ba,a,pt		%xcc, .Lexit
 .Lmedium_unaligned:
 	/* First get dest 8 byte aligned.  */
 	sub		%g0, %o0, %g1
@@ -465,7 +458,7 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	add		%o1, 1, %o1
 	subcc		%g1, 1, %g1
 	add		%o0, 1, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1)
 2:
 	and		%o1, 0x7, %g1
@@ -485,30 +478,30 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	or		GLOBAL_SPARE, %o4, GLOBAL_SPARE
 	EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8)
 	add		%o0, 0x08, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 sllx		%g3, %g1, %o4
 	srl		%g1, 3, %g1
 	add		%o1, %g1, %o1
 	brz,pn		%o2, .Lexit
 	 nop
-	ba,pt		%icc, .Lsmall_unaligned
+	ba,pt		%xcc, .Lsmall_unaligned
 
 .Ltiny:
 	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
 	subcc		%o2, 1, %o2
-	be,pn		%icc, .Lexit
+	be,pn		%xcc, .Lexit
 	 EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1)
 	EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2)
 	subcc		%o2, 1, %o2
-	be,pn		%icc, .Lexit
+	be,pn		%xcc, .Lexit
 	 EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1)
 	EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2)
-	ba,pt		%icc, .Lexit
+	ba,pt		%xcc, .Lexit
 	 EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2)
 
 .Lsmall:
 	andcc		%g2, 0x3, %g0
-	bne,pn		%icc, .Lsmall_unaligned
+	bne,pn		%xcc, .Lsmall_unaligned
 	 andn		%o2, 0x4 - 1, %o5
 	sub		%o2, %o5, %o2
 1:
@@ -516,18 +509,18 @@ FUNC_NAME:	/* %o0=dst, %o1=src, %o2=len */
 	add		%o1, 0x04, %o1
 	subcc		%o5, 0x04, %o5
 	add		%o0, 0x04, %o0
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4)
 	brz,pt		%o2, .Lexit
 	 nop
-	ba,a,pt		%icc, .Ltiny
+	ba,a,pt		%xcc, .Ltiny
 
 .Lsmall_unaligned:
 1:	EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2)
 	add		%o1, 1, %o1
 	add		%o0, 1, %o0
 	subcc		%o2, 1, %o2
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1)
-	ba,a,pt		%icc, .Lexit
+	ba,a,pt		%xcc, .Lexit
 	.size		FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc/lib/NG4memset.S b/arch/sparc/lib/NG4memset.S
index 41da4bd..e7c2e70 100644
--- a/arch/sparc/lib/NG4memset.S
+++ b/arch/sparc/lib/NG4memset.S
@@ -13,14 +13,14 @@
 	.globl		NG4memset
 NG4memset:
 	andcc		%o1, 0xff, %o4
-	be,pt		%icc, 1f
+	be,pt		%xcc, 1f
 	 mov		%o2, %o1
 	sllx		%o4, 8, %g1
 	or		%g1, %o4, %o2
 	sllx		%o2, 16, %g1
 	or		%g1, %o2, %o2
 	sllx		%o2, 32, %g1
-	ba,pt		%icc, 1f
+	ba,pt		%xcc, 1f
 	 or		%g1, %o2, %o4
 	.size		NG4memset,.-NG4memset
 
@@ -29,7 +29,7 @@ NG4memset:
 NG4bzero:
 	clr		%o4
 1:	cmp		%o1, 16
-	ble		%icc, .Ltiny
+	ble		%xcc, .Ltiny
 	 mov		%o0, %o3
 	sub		%g0, %o0, %g1
 	and		%g1, 0x7, %g1
@@ -37,7 +37,7 @@ NG4bzero:
 	 sub		%o1, %g1, %o1
 1:	stb		%o4, [%o0 + 0x00]
 	subcc		%g1, 1, %g1
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 add		%o0, 1, %o0
 .Laligned8:
 	cmp		%o1, 64 + (64 - 8)
@@ -48,7 +48,7 @@ NG4bzero:
 	 sub		%o1, %g1, %o1
 1:	stx		%o4, [%o0 + 0x00]
 	subcc		%g1, 8, %g1
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 add		%o0, 0x8, %o0
 .Laligned64:
 	andn		%o1, 64 - 1, %g1
@@ -58,30 +58,30 @@ NG4bzero:
 1:	stxa		%o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P
 	subcc		%g1, 0x40, %g1
 	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 add		%o0, 0x40, %o0
 .Lpostloop:
 	cmp		%o1, 8
-	bl,pn		%icc, .Ltiny
+	bl,pn		%xcc, .Ltiny
 	 membar		#StoreStore|#StoreLoad
 .Lmedium:
 	andn		%o1, 0x7, %g1
 	sub		%o1, %g1, %o1
 1:	stx		%o4, [%o0 + 0x00]
 	subcc		%g1, 0x8, %g1
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 add		%o0, 0x08, %o0
 	andcc		%o1, 0x4, %g1
-	be,pt		%icc, .Ltiny
+	be,pt		%xcc, .Ltiny
 	 sub		%o1, %g1, %o1
 	stw		%o4, [%o0 + 0x00]
 	add		%o0, 0x4, %o0
 .Ltiny:
 	cmp		%o1, 0
-	be,pn		%icc, .Lexit
+	be,pn		%xcc, .Lexit
 1:	 subcc		%o1, 1, %o1
 	stb		%o4, [%o0 + 0x00]
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 add		%o0, 1, %o0
 .Lexit:
 	retl
@@ -99,7 +99,7 @@ NG4bzero:
 	stxa		%o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P
 	stxa		%o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P
 	stxa		%o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P
-	bne,pt		%icc, 1b
+	bne,pt		%xcc, 1b
 	 add		%o0, 0x30, %o0
-	ba,a,pt		%icc, .Lpostloop
+	ba,a,pt		%xcc, .Lpostloop
 	.size		NG4bzero,.-NG4bzero
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Development]     [DCCP]     [Linux ARM Development]     [Linux]     [Photo]     [Yosemite Help]     [Linux ARM Kernel]     [Linux SCSI]     [Linux x86_64]     [Linux Hams]

  Powered by Linux