[PATCH 3/7] MIPS: memcpy: Split __copy_user & memcpy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Up until now we have shared the same code for __copy_user() & memcpy(),
but this has the drawback that we use a non-standard ABI for __copy_user
and thus need to call it via inline assembly rather than a simple
function call. In order to allow for further patches to change this,
split the __copy_user() & memcpy() functions.

The resulting implementations of __copy_user() & memcpy() should differ
only in the existing difference of return value and that memcpy()
doesn't generate entries in the exception table or include exception
fixup code.

For octeon this involves introducing the __BUILD_COPY_USER macro &
renaming labels to remain unique, making the code match the non-octeon
memcpy implementation more closely.

Signed-off-by: Paul Burton <paul.burton@xxxxxxxxxx>
---

 arch/mips/cavium-octeon/octeon-memcpy.S | 141 +++++++++++++++++++-------------
 arch/mips/lib/memcpy.S                  |  74 ++++++++++-------
 2 files changed, 131 insertions(+), 84 deletions(-)

diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S
index 4336316..944f8f5 100644
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@@ -18,6 +18,9 @@
 #include <asm/export.h>
 #include <asm/regdef.h>
 
+#define MEMCPY_MODE	1
+#define USER_COPY_MODE	2
+
 #define dst a0
 #define src a1
 #define len a2
@@ -70,9 +73,11 @@
 
 #define EXC(inst_reg,addr,handler)		\
 9:	inst_reg, addr;				\
-	.section __ex_table,"a";		\
-	PTR	9b, handler;			\
-	.previous
+	.if	\mode != MEMCPY_MODE;		\
+		.section __ex_table,"a";	\
+		PTR	9b, handler;		\
+		.previous;			\
+	.endif
 
 /*
  * Only on the 64-bit kernel we can made use of 64-bit registers.
@@ -136,30 +141,7 @@
 	.set	noreorder
 	.set	noat
 
-/*
- * t7 is used as a flag to note inatomic mode.
- */
-LEAF(__copy_user_inatomic)
-EXPORT_SYMBOL(__copy_user_inatomic)
-	b	__copy_user_common
-	 li	t7, 1
-	END(__copy_user_inatomic)
-
-/*
- * A combined memcpy/__copy_user
- * __copy_user sets len to 0 for success; else to an upper bound of
- * the number of uncopied bytes.
- * memcpy sets v0 to dst.
- */
-	.align	5
-LEAF(memcpy)					/* a0=dst a1=src a2=len */
-EXPORT_SYMBOL(memcpy)
-	move	v0, dst				/* return value */
-__memcpy:
-FEXPORT(__copy_user)
-EXPORT_SYMBOL(__copy_user)
-	li	t7, 0				/* not inatomic */
-__copy_user_common:
+	.macro __BUILD_COPY_USER mode
 	/*
 	 * Note: dst & src may be unaligned, len may be 0
 	 * Temps
@@ -170,15 +152,15 @@ __copy_user_common:
 	#
 	pref	0, 0(src)
 	sltu	t0, len, NBYTES		# Check if < 1 word
-	bnez	t0, copy_bytes_checklen
+	bnez	t0, .Lcopy_bytes_checklen\@
 	 and	t0, src, ADDRMASK	# Check if src unaligned
-	bnez	t0, src_unaligned
+	bnez	t0, .Lsrc_unaligned\@
 	 sltu	t0, len, 4*NBYTES	# Check if < 4 words
-	bnez	t0, less_than_4units
+	bnez	t0, .Lless_than_4units\@
 	 sltu	t0, len, 8*NBYTES	# Check if < 8 words
-	bnez	t0, less_than_8units
+	bnez	t0, .Lless_than_8units\@
 	 sltu	t0, len, 16*NBYTES	# Check if < 16 words
-	bnez	t0, cleanup_both_aligned
+	bnez	t0, .Lcleanup_both_aligned\@
 	 sltu	t0, len, 128+1		# Check if len < 129
 	bnez	t0, 1f			# Skip prefetch if len is too short
 	 sltu	t0, len, 256+1		# Check if len < 257
@@ -233,10 +215,10 @@ EXC(	STORE	t3, UNIT(-1)(dst),	s_exc_p1u)
 	#
 	# Jump here if there are less than 16*NBYTES left.
 	#
-cleanup_both_aligned:
-	beqz	len, done
+.Lcleanup_both_aligned\@:
+	beqz	len, .Ldone\@
 	 sltu	t0, len, 8*NBYTES
-	bnez	t0, less_than_8units
+	bnez	t0, .Lless_than_8units\@
 	 nop
 EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
 EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
@@ -256,14 +238,14 @@ EXC(	STORE	t1, UNIT(5)(dst),	s_exc_p3u)
 EXC(	STORE	t2, UNIT(6)(dst),	s_exc_p2u)
 EXC(	STORE	t3, UNIT(7)(dst),	s_exc_p1u)
 	ADD	src, src, 8*NBYTES
-	beqz	len, done
+	beqz	len, .Ldone\@
 	 ADD	dst, dst, 8*NBYTES
 	#
 	# Jump here if there are less than 8*NBYTES left.
 	#
-less_than_8units:
+.Lless_than_8units\@:
 	sltu	t0, len, 4*NBYTES
-	bnez	t0, less_than_4units
+	bnez	t0, .Lless_than_4units\@
 	 nop
 EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
 EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
@@ -275,15 +257,15 @@ EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
 EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
 EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
 	ADD	src, src, 4*NBYTES
-	beqz	len, done
+	beqz	len, .Ldone\@
 	 ADD	dst, dst, 4*NBYTES
 	#
 	# Jump here if there are less than 4*NBYTES left. This means
 	# we may need to copy up to 3 NBYTES words.
 	#
-less_than_4units:
+.Lless_than_4units\@:
 	sltu	t0, len, 1*NBYTES
-	bnez	t0, copy_bytes_checklen
+	bnez	t0, .Lcopy_bytes_checklen\@
 	 nop
 	#
 	# 1) Copy NBYTES, then check length again
@@ -293,7 +275,7 @@ EXC(	LOAD	t0, 0(src),		l_exc)
 	sltu	t1, len, 8
 EXC(	STORE	t0, 0(dst),		s_exc_p1u)
 	ADD	src, src, NBYTES
-	bnez	t1, copy_bytes_checklen
+	bnez	t1, .Lcopy_bytes_checklen\@
 	 ADD	dst, dst, NBYTES
 	#
 	# 2) Copy NBYTES, then check length again
@@ -303,7 +285,7 @@ EXC(	LOAD	t0, 0(src),		l_exc)
 	sltu	t1, len, 8
 EXC(	STORE	t0, 0(dst),		s_exc_p1u)
 	ADD	src, src, NBYTES
-	bnez	t1, copy_bytes_checklen
+	bnez	t1, .Lcopy_bytes_checklen\@
 	 ADD	dst, dst, NBYTES
 	#
 	# 3) Copy NBYTES, then check length again
@@ -312,13 +294,13 @@ EXC(	LOAD	t0, 0(src),		l_exc)
 	SUB	len, len, NBYTES
 	ADD	src, src, NBYTES
 	ADD	dst, dst, NBYTES
-	b copy_bytes_checklen
+	b .Lcopy_bytes_checklen\@
 EXC(	 STORE	t0, -8(dst),		s_exc_p1u)
 
-src_unaligned:
+.Lsrc_unaligned\@:
 #define rem t8
 	SRL	t0, len, LOG_NBYTES+2	 # +2 for 4 units/iter
-	beqz	t0, cleanup_src_unaligned
+	beqz	t0, .Lcleanup_src_unaligned\@
 	 and	rem, len, (4*NBYTES-1)	 # rem = len % 4*NBYTES
 1:
 /*
@@ -344,10 +326,10 @@ EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
 	bne	len, rem, 1b
 	 ADD	dst, dst, 4*NBYTES
 
-cleanup_src_unaligned:
-	beqz	len, done
+.Lcleanup_src_unaligned\@:
+	beqz	len, .Ldone\@
 	 and	rem, len, NBYTES-1  # rem = len % NBYTES
-	beq	rem, len, copy_bytes
+	beq	rem, len, .Lcopy_bytes\@
 	 nop
 1:
 EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
@@ -358,15 +340,15 @@ EXC(	STORE	t0, 0(dst),		s_exc_p1u)
 	bne	len, rem, 1b
 	 ADD	dst, dst, NBYTES
 
-copy_bytes_checklen:
-	beqz	len, done
+.Lcopy_bytes_checklen\@:
+	beqz	len, .Ldone\@
 	 nop
-copy_bytes:
+.Lcopy_bytes\@:
 	/* 0 < len < NBYTES  */
 #define COPY_BYTE(N)			\
 EXC(	lb	t0, N(src), l_exc);	\
 	SUB	len, len, 1;		\
-	beqz	len, done;		\
+	beqz	len, .Ldone\@;		\
 EXC(	 sb	t0, N(dst), s_exc_p1)
 
 	COPY_BYTE(0)
@@ -379,10 +361,12 @@ EXC(	lb	t0, NBYTES-2(src), l_exc)
 	SUB	len, len, 1
 	jr	ra
 EXC(	 sb	t0, NBYTES-2(dst), s_exc_p1)
-done:
+.Ldone\@:
 	jr	ra
 	 nop
-	END(memcpy)
+
+	/* memcpy shouldn't generate exceptions */
+	.if \mode != MEMCPY_MODE
 
 l_exc_copy:
 	/*
@@ -419,7 +403,7 @@ l_exc:
 	 * Clear len bytes starting at dst.  Can't call __bzero because it
 	 * might modify len.  An inefficient loop for these rare times...
 	 */
-	beqz	len, done
+	beqz	len, .Ldone\@
 	 SUB	src, len, 1
 1:	sb	zero, 0(dst)
 	ADD	dst, dst, 1
@@ -457,3 +441,48 @@ s_exc_p1:
 s_exc:
 	jr	ra
 	 nop
+	.endif	/* \mode != MEMCPY_MODE */
+	.endm
+
+/*
+ * memcpy() - Copy memory
+ * @a0 - destination
+ * @a1 - source
+ * @a2 - length
+ *
+ * Copy @a2 bytes of memory from @a1 to @a0.
+ *
+ * Returns: the destination pointer
+ */
+	.align	5
+LEAF(memcpy)					/* a0=dst a1=src a2=len */
+EXPORT_SYMBOL(memcpy)
+	move	v0, dst				/* return value */
+	__BUILD_COPY_USER MEMCPY_MODE
+	END(memcpy)
+
+/*
+ * __copy_user() - Copy memory
+ * @a0 - destination
+ * @a1 - source
+ * @a2 - length
+ *
+ * Copy @a2 bytes of memory from @a1 to @a0.
+ *
+ * Returns: the number of uncopied bytes in @a2
+ */
+LEAF(__copy_user)
+EXPORT_SYMBOL(__copy_user)
+	li	t7, 0				/* not inatomic */
+__copy_user_common:
+	__BUILD_COPY_USER COPY_USER_MODE
+	END(__copy_user)
+
+/*
+ * t7 is used as a flag to note inatomic mode.
+ */
+LEAF(__copy_user_inatomic)
+EXPORT_SYMBOL(__copy_user_inatomic)
+	b	__copy_user_common
+	 li	t7, 1
+	END(__copy_user_inatomic)
diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S
index b8d34d9..bfbe23c 100644
--- a/arch/mips/lib/memcpy.S
+++ b/arch/mips/lib/memcpy.S
@@ -92,6 +92,7 @@
 #define DST_PREFETCH 2
 #define LEGACY_MODE 1
 #define EVA_MODE    2
+#define MEMCPY_MODE 3
 #define USEROP   1
 #define KERNELOP 2
 
@@ -107,7 +108,9 @@
  */
 
 #define EXC(insn, type, reg, addr, handler)			\
-	.if \mode == LEGACY_MODE;				\
+	.if \mode == MEMCPY_MODE;				\
+		insn reg, addr;					\
+	.elseif \mode == LEGACY_MODE;				\
 9:		insn reg, addr;					\
 		.section __ex_table,"a";			\
 		PTR	9b, handler;				\
@@ -199,7 +202,7 @@
 #define STOREB(reg, addr, handler)	EXC(sb, ST_INSN, reg, addr, handler)
 
 #define _PREF(hint, addr, type)						\
-	.if \mode == LEGACY_MODE;					\
+	.if \mode != EVA_MODE;						\
 		PREF(hint, addr);					\
 	.else;								\
 		.if ((\from == USEROP) && (type == SRC_PREFETCH)) ||	\
@@ -255,18 +258,12 @@
 	/*
 	 * Macro to build the __copy_user common code
 	 * Arguments:
-	 * mode : LEGACY_MODE or EVA_MODE
+	 * mode : LEGACY_MODE, EVA_MODE or MEMCPY_MODE
 	 * from : Source operand. USEROP or KERNELOP
 	 * to   : Destination operand. USEROP or KERNELOP
 	 */
 	.macro __BUILD_COPY_USER mode, from, to
 
-	/* initialize __memcpy if this the first time we execute this macro */
-	.ifnotdef __memcpy
-	.set __memcpy, 1
-	.hidden __memcpy /* make sure it does not leak */
-	.endif
-
 	/*
 	 * Note: dst & src may be unaligned, len may be 0
 	 * Temps
@@ -525,11 +522,9 @@
 	b	1b
 	 ADD	dst, dst, 8
 #endif /* CONFIG_CPU_MIPSR6 */
-	.if __memcpy == 1
-	END(memcpy)
-	.set __memcpy, 0
-	.hidden __memcpy
-	.endif
+
+	/* memcpy shouldn't generate exceptions */
+	.if	\mode != MEMCPY_MODE
 
 .Ll_exc_copy\@:
 	/*
@@ -616,34 +611,57 @@ SEXC(1)
 .Ls_exc\@:
 	jr	ra
 	 nop
-	.endm
 
-/*
- * t6 is used as a flag to note inatomic mode.
- */
-LEAF(__copy_user_inatomic)
-EXPORT_SYMBOL(__copy_user_inatomic)
-	b	__copy_user_common
-	li	t6, 1
-	END(__copy_user_inatomic)
+	.endif	/* \mode != MEMCPY_MODE */
+	.endm
 
 /*
- * A combined memcpy/__copy_user
- * __copy_user sets len to 0 for success; else to an upper bound of
- * the number of uncopied bytes.
- * memcpy sets v0 to dst.
+ * memcpy() - Copy memory
+ * @a0 - destination
+ * @a1 - source
+ * @a2 - length
+ *
+ * Copy @a2 bytes of memory from @a1 to @a0.
+ *
+ * Returns: the destination pointer
  */
 	.align	5
 LEAF(memcpy)					/* a0=dst a1=src a2=len */
 EXPORT_SYMBOL(memcpy)
 	move	v0, dst				/* return value */
 .L__memcpy:
-FEXPORT(__copy_user)
+	li	t6, 0	/* not inatomic */
+	/* Legacy Mode, user <-> user */
+	__BUILD_COPY_USER MEMCPY_MODE USEROP USEROP
+	END(memcpy)
+
+/*
+ * __copy_user() - Copy memory
+ * @a0 - destination
+ * @a1 - source
+ * @a2 - length
+ *
+ * Copy @a2 bytes of memory from @a1 to @a0.
+ *
+ * Returns: the number of uncopied bytes in @a2
+ */
+	.align	5
+LEAF(__copy_user)
 EXPORT_SYMBOL(__copy_user)
 	li	t6, 0	/* not inatomic */
 __copy_user_common:
 	/* Legacy Mode, user <-> user */
 	__BUILD_COPY_USER LEGACY_MODE USEROP USEROP
+	END(__copy_user)
+
+/*
+ * t6 is used as a flag to note inatomic mode.
+ */
+LEAF(__copy_user_inatomic)
+EXPORT_SYMBOL(__copy_user_inatomic)
+	b	__copy_user_common
+	li	t6, 1
+	END(__copy_user_inatomic)
 
 #ifdef CONFIG_EVA
 
-- 
2.10.2





[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux