[PATCH 3/4] MIPS: Fix MSA assembly with big thread offsets

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When lockdep is enabled on a 64-bit kernel the FPR offset into the
thread structure exceeds the maximum range of the MSA ld.d/st.d
instructions. For example THREAD_FPR31 = 4644 (instead of 2448), while
the signed immediate field is only 10 bits with an implicit multiply by
8, giving a maximum offset of 511*8 = 4088.

This isn't a problem when the toolchain doesn't support MSA as the
ld_*/st_* macros perform the addition separately into $1 with [d]addui
which has a 16bit signed immediate field.

Fix the case where the toolchain does support MSA by doing a single
addition of THREAD_FPR0 into $1 with [d]addui, and doing the ld_*/st_*
relative to that.

Signed-off-by: James Hogan <james.hogan@xxxxxxxxxx>
Cc: Ralf Baechle <ralf@xxxxxxxxxxxxxx>
Cc: Paul Burton <paul.burton@xxxxxxxxxx>
Cc: linux-mips@xxxxxxxxxxxxxx
---
 arch/mips/include/asm/asmmacro.h | 147 ++++++++++++++++++++++-----------------
 1 file changed, 82 insertions(+), 65 deletions(-)

diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index e689b894353c..637fccab5604 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -496,41 +496,52 @@
 	.endm
 #endif
 
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+#define FPR_BASE_OFFS	THREAD_FPR0
+#define FPR_BASE	$1
+#else
+#define FPR_BASE_OFFS	0
+#define FPR_BASE	\thread
+#endif
+
 	.macro	msa_save_all	thread
-	st_d	0, THREAD_FPR0, \thread
-	st_d	1, THREAD_FPR1, \thread
-	st_d	2, THREAD_FPR2, \thread
-	st_d	3, THREAD_FPR3, \thread
-	st_d	4, THREAD_FPR4, \thread
-	st_d	5, THREAD_FPR5, \thread
-	st_d	6, THREAD_FPR6, \thread
-	st_d	7, THREAD_FPR7, \thread
-	st_d	8, THREAD_FPR8, \thread
-	st_d	9, THREAD_FPR9, \thread
-	st_d	10, THREAD_FPR10, \thread
-	st_d	11, THREAD_FPR11, \thread
-	st_d	12, THREAD_FPR12, \thread
-	st_d	13, THREAD_FPR13, \thread
-	st_d	14, THREAD_FPR14, \thread
-	st_d	15, THREAD_FPR15, \thread
-	st_d	16, THREAD_FPR16, \thread
-	st_d	17, THREAD_FPR17, \thread
-	st_d	18, THREAD_FPR18, \thread
-	st_d	19, THREAD_FPR19, \thread
-	st_d	20, THREAD_FPR20, \thread
-	st_d	21, THREAD_FPR21, \thread
-	st_d	22, THREAD_FPR22, \thread
-	st_d	23, THREAD_FPR23, \thread
-	st_d	24, THREAD_FPR24, \thread
-	st_d	25, THREAD_FPR25, \thread
-	st_d	26, THREAD_FPR26, \thread
-	st_d	27, THREAD_FPR27, \thread
-	st_d	28, THREAD_FPR28, \thread
-	st_d	29, THREAD_FPR29, \thread
-	st_d	30, THREAD_FPR30, \thread
-	st_d	31, THREAD_FPR31, \thread
 	.set	push
 	.set	noat
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+	PTR_ADDU FPR_BASE, \thread, FPR_BASE_OFFS
+#endif
+	st_d	 0, THREAD_FPR0  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 1, THREAD_FPR1  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 2, THREAD_FPR2  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 3, THREAD_FPR3  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 4, THREAD_FPR4  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 5, THREAD_FPR5  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 6, THREAD_FPR6  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 7, THREAD_FPR7  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 8, THREAD_FPR8  - FPR_BASE_OFFS, FPR_BASE
+	st_d	 9, THREAD_FPR9  - FPR_BASE_OFFS, FPR_BASE
+	st_d	10, THREAD_FPR10 - FPR_BASE_OFFS, FPR_BASE
+	st_d	11, THREAD_FPR11 - FPR_BASE_OFFS, FPR_BASE
+	st_d	12, THREAD_FPR12 - FPR_BASE_OFFS, FPR_BASE
+	st_d	13, THREAD_FPR13 - FPR_BASE_OFFS, FPR_BASE
+	st_d	14, THREAD_FPR14 - FPR_BASE_OFFS, FPR_BASE
+	st_d	15, THREAD_FPR15 - FPR_BASE_OFFS, FPR_BASE
+	st_d	16, THREAD_FPR16 - FPR_BASE_OFFS, FPR_BASE
+	st_d	17, THREAD_FPR17 - FPR_BASE_OFFS, FPR_BASE
+	st_d	18, THREAD_FPR18 - FPR_BASE_OFFS, FPR_BASE
+	st_d	19, THREAD_FPR19 - FPR_BASE_OFFS, FPR_BASE
+	st_d	20, THREAD_FPR20 - FPR_BASE_OFFS, FPR_BASE
+	st_d	21, THREAD_FPR21 - FPR_BASE_OFFS, FPR_BASE
+	st_d	22, THREAD_FPR22 - FPR_BASE_OFFS, FPR_BASE
+	st_d	23, THREAD_FPR23 - FPR_BASE_OFFS, FPR_BASE
+	st_d	24, THREAD_FPR24 - FPR_BASE_OFFS, FPR_BASE
+	st_d	25, THREAD_FPR25 - FPR_BASE_OFFS, FPR_BASE
+	st_d	26, THREAD_FPR26 - FPR_BASE_OFFS, FPR_BASE
+	st_d	27, THREAD_FPR27 - FPR_BASE_OFFS, FPR_BASE
+	st_d	28, THREAD_FPR28 - FPR_BASE_OFFS, FPR_BASE
+	st_d	29, THREAD_FPR29 - FPR_BASE_OFFS, FPR_BASE
+	st_d	30, THREAD_FPR30 - FPR_BASE_OFFS, FPR_BASE
+	st_d	31, THREAD_FPR31 - FPR_BASE_OFFS, FPR_BASE
 	SET_HARDFLOAT
 	_cfcmsa	$1, MSA_CSR
 	sw	$1, THREAD_MSA_CSR(\thread)
@@ -543,41 +554,47 @@
 	SET_HARDFLOAT
 	lw	$1, THREAD_MSA_CSR(\thread)
 	_ctcmsa	MSA_CSR, $1
-	.set	pop
-	ld_d	0, THREAD_FPR0, \thread
-	ld_d	1, THREAD_FPR1, \thread
-	ld_d	2, THREAD_FPR2, \thread
-	ld_d	3, THREAD_FPR3, \thread
-	ld_d	4, THREAD_FPR4, \thread
-	ld_d	5, THREAD_FPR5, \thread
-	ld_d	6, THREAD_FPR6, \thread
-	ld_d	7, THREAD_FPR7, \thread
-	ld_d	8, THREAD_FPR8, \thread
-	ld_d	9, THREAD_FPR9, \thread
-	ld_d	10, THREAD_FPR10, \thread
-	ld_d	11, THREAD_FPR11, \thread
-	ld_d	12, THREAD_FPR12, \thread
-	ld_d	13, THREAD_FPR13, \thread
-	ld_d	14, THREAD_FPR14, \thread
-	ld_d	15, THREAD_FPR15, \thread
-	ld_d	16, THREAD_FPR16, \thread
-	ld_d	17, THREAD_FPR17, \thread
-	ld_d	18, THREAD_FPR18, \thread
-	ld_d	19, THREAD_FPR19, \thread
-	ld_d	20, THREAD_FPR20, \thread
-	ld_d	21, THREAD_FPR21, \thread
-	ld_d	22, THREAD_FPR22, \thread
-	ld_d	23, THREAD_FPR23, \thread
-	ld_d	24, THREAD_FPR24, \thread
-	ld_d	25, THREAD_FPR25, \thread
-	ld_d	26, THREAD_FPR26, \thread
-	ld_d	27, THREAD_FPR27, \thread
-	ld_d	28, THREAD_FPR28, \thread
-	ld_d	29, THREAD_FPR29, \thread
-	ld_d	30, THREAD_FPR30, \thread
-	ld_d	31, THREAD_FPR31, \thread
+#ifdef TOOLCHAIN_SUPPORTS_MSA
+	PTR_ADDU FPR_BASE, \thread, FPR_BASE_OFFS
+#endif
+	ld_d	 0, THREAD_FPR0  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 1, THREAD_FPR1  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 2, THREAD_FPR2  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 3, THREAD_FPR3  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 4, THREAD_FPR4  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 5, THREAD_FPR5  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 6, THREAD_FPR6  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 7, THREAD_FPR7  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 8, THREAD_FPR8  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	 9, THREAD_FPR9  - FPR_BASE_OFFS, FPR_BASE
+	ld_d	10, THREAD_FPR10 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	11, THREAD_FPR11 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	12, THREAD_FPR12 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	13, THREAD_FPR13 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	14, THREAD_FPR14 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	15, THREAD_FPR15 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	16, THREAD_FPR16 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	17, THREAD_FPR17 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	18, THREAD_FPR18 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	19, THREAD_FPR19 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	20, THREAD_FPR20 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	21, THREAD_FPR21 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	22, THREAD_FPR22 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	23, THREAD_FPR23 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	24, THREAD_FPR24 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	25, THREAD_FPR25 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	26, THREAD_FPR26 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	27, THREAD_FPR27 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	28, THREAD_FPR28 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	29, THREAD_FPR29 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	30, THREAD_FPR30 - FPR_BASE_OFFS, FPR_BASE
+	ld_d	31, THREAD_FPR31 - FPR_BASE_OFFS, FPR_BASE
+	.set pop
 	.endm
 
+#undef FPR_BASE_OFFS
+#undef FPR_BASE
+
 	.macro	msa_init_upper wd
 #ifdef CONFIG_64BIT
 	insert_d \wd, 1
-- 
2.4.10





[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux