Re: [PATCH 2/2] MIPS: Loongson: Introduce and use loongson_llsc_mb()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> 在 2018年12月25日,上午8:51,Huacai Chen <chenhc@xxxxxxxxxx> 写道:
> 
> On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and
> lld/scd is very weak ordering. We should add sync instructions before
> each ll/lld and after the last sc/scd to workaround. Otherwise, this
> flaw will cause deadlock occationally (e.g. when doing heavy load test
> with LTP).
> 
> This patch is a less invasive but more comfortable solution, it is even
> not a enough solution in theory (but enough in practice). The solution
> which is enough in theory (which is more than a minimal change) looks
> too ugly, and you can refer to:
> https://patchwork.linux-mips.org/patch/21134/.
> 
> Why disable fix-loongson3-llsc in compiler?
> Because compiler fix will cause problems in kernel's .fixup section.

It is __ex_table section.

> 
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx>
> ---
> arch/mips/include/asm/atomic.h  |  4 ++++
> arch/mips/include/asm/barrier.h |  6 ++++++
> arch/mips/include/asm/bitops.h  | 10 ++++++++++
> arch/mips/include/asm/edac.h    |  2 ++
> arch/mips/include/asm/futex.h   |  4 ++++
> arch/mips/include/asm/pgtable.h |  2 ++
> arch/mips/loongson64/Platform   |  3 +++
> arch/mips/mm/tlbex.c            | 11 +++++++++++
> 8 files changed, 42 insertions(+)
> 
> diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
> index 43fcd35..dbbc6c1 100644
> --- a/arch/mips/include/asm/atomic.h
> +++ b/arch/mips/include/asm/atomic.h
> @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
> 	if (kernel_uses_llsc) {						      \
> 		int temp;						      \
> 									      \
> +		loongson_llsc_mb();					      \

I noticed that we have smp_mb__before_llsc
   And 
smp_llsc_mb

There are smp_mb__before_llsc/smp_llsc_mb in some situation, and it is not in some situation?

Why is it like this?
I guess we should add smp_mb__before_llsc/smp_llsc_mb pairs for all ll/sc.

> 		__asm__ __volatile__(					      \
> 		"	.set	push					\n"   \
> 		"	.set	"MIPS_ISA_LEVEL"			\n"   \
> @@ -68,6 +69,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v)			      \
> 		"	.set	pop					\n"   \
> 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
> 		: "Ir" (i));						      \
> +		loongson_llsc_mb();					      \
> 	} else {							      \
> 		unsigned long flags;					      \
> 									      \
> @@ -256,6 +258,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \
> 	if (kernel_uses_llsc) {						      \
> 		long temp;						      \
> 									      \
> +		loongson_llsc_mb();					      \
> 		__asm__ __volatile__(					      \
> 		"	.set	push					\n"   \
> 		"	.set	"MIPS_ISA_LEVEL"			\n"   \
> @@ -266,6 +269,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \
> 		"	.set	pop					\n"   \
> 		: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter)	      \
> 		: "Ir" (i));						      \
> +		loongson_llsc_mb();					      \
> 	} else {							      \
> 		unsigned long flags;					      \
> 									      \
> diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
> index a5eb1bb..7ce9a15 100644
> --- a/arch/mips/include/asm/barrier.h
> +++ b/arch/mips/include/asm/barrier.h
> @@ -222,6 +222,12 @@
> #define __smp_mb__before_atomic()	__smp_mb__before_llsc()
> #define __smp_mb__after_atomic()	smp_llsc_mb()
> 
> +#if defined(CONFIG_LOONGSON3) && defined(CONFIG_SMP) /* Loongson-3's LLSC workaround */
> +#define loongson_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
> +#else
> +#define loongson_llsc_mb()	__asm__ __volatile__("            " : : :"memory")
> +#endif
> +
> #include <asm-generic/barrier.h>
> 
> #endif /* __ASM_BARRIER_H */
> diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h
> index f2a840f..d70c7e5 100644
> --- a/arch/mips/include/asm/bitops.h
> +++ b/arch/mips/include/asm/bitops.h
> @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
> 		: "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m));
> #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
> 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
> +		loongson_llsc_mb();
> 		do {
> 			__asm__ __volatile__(
> 			"	" __LL "%0, %1		# set_bit	\n"
> @@ -77,8 +78,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
> 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
> 			: "ir" (bit), "r" (~0));
> 		} while (unlikely(!temp));
> +		loongson_llsc_mb();
> #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
> 	} else if (kernel_uses_llsc) {
> +		loongson_llsc_mb();
> 		do {
> 			__asm__ __volatile__(
> 			"	.set	push				\n"
> @@ -90,6 +93,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr)
> 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
> 			: "ir" (1UL << bit));
> 		} while (unlikely(!temp));
> +		loongson_llsc_mb();
> 	} else
> 		__mips_set_bit(nr, addr);
> }
> @@ -123,6 +127,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
> 		: "ir" (~(1UL << bit)));
> #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
> 	} else if (kernel_uses_llsc && __builtin_constant_p(bit)) {
> +		loongson_llsc_mb();
> 		do {
> 			__asm__ __volatile__(
> 			"	" __LL "%0, %1		# clear_bit	\n"
> @@ -131,8 +136,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
> 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
> 			: "ir" (bit));
> 		} while (unlikely(!temp));
> +		loongson_llsc_mb();
> #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
> 	} else if (kernel_uses_llsc) {
> +		loongson_llsc_mb();
> 		do {
> 			__asm__ __volatile__(
> 			"	.set	push				\n"
> @@ -144,6 +151,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr)
> 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
> 			: "ir" (~(1UL << bit)));
> 		} while (unlikely(!temp));
> +		loongson_llsc_mb();
> 	} else
> 		__mips_clear_bit(nr, addr);
> }
> @@ -193,6 +201,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
> 		unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG);
> 		unsigned long temp;
> 
> +		loongson_llsc_mb();
> 		do {
> 			__asm__ __volatile__(
> 			"	.set	push				\n"
> @@ -204,6 +213,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr)
> 			: "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m)
> 			: "ir" (1UL << bit));
> 		} while (unlikely(!temp));
> +		loongson_llsc_mb();
> 	} else
> 		__mips_change_bit(nr, addr);
> }
> diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h
> index c5d1477..1abf69c 100644
> --- a/arch/mips/include/asm/edac.h
> +++ b/arch/mips/include/asm/edac.h
> @@ -20,6 +20,7 @@ static inline void edac_atomic_scrub(void *va, u32 size)
> 		 * Intel: asm("lock; addl $0, %0"::"m"(*virt_addr));
> 		 */
> 
> +		loongson_llsc_mb();
> 		__asm__ __volatile__ (
> 		"	.set	push					\n"
> 		"	.set	mips2					\n"
> @@ -30,6 +31,7 @@ static inline void edac_atomic_scrub(void *va, u32 size)
> 		"	.set	pop					\n"
> 		: "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr)
> 		: GCC_OFF_SMALL_ASM() (*virt_addr));
> +		loongson_llsc_mb();
> 
> 		virt_addr++;
> 	}
> diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h
> index 8eff134..07b0be8 100644
> --- a/arch/mips/include/asm/futex.h
> +++ b/arch/mips/include/asm/futex.h
> @@ -50,6 +50,7 @@
> 		  "i" (-EFAULT)						\
> 		: "memory");						\
> 	} else if (cpu_has_llsc) {					\
> +		loongson_llsc_mb();					\
> 		__asm__ __volatile__(					\
> 		"	.set	push				\n"	\
> 		"	.set	noat				\n"	\
> @@ -78,6 +79,7 @@
> 		: "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg),	\
> 		  "i" (-EFAULT)						\
> 		: "memory");						\
> +		loongson_llsc_mb();					\
> 	} else								\
> 		ret = -ENOSYS;						\
> }
> @@ -163,6 +165,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
> 		  "i" (-EFAULT)
> 		: "memory");
> 	} else if (cpu_has_llsc) {
> +		loongson_llsc_mb();
> 		__asm__ __volatile__(
> 		"# futex_atomic_cmpxchg_inatomic			\n"
> 		"	.set	push					\n"
> @@ -192,6 +195,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
> 		: GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
> 		  "i" (-EFAULT)
> 		: "memory");
> +		loongson_llsc_mb();
> 	} else
> 		return -ENOSYS;
> 
> diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
> index 57933fc..910851c 100644
> --- a/arch/mips/include/asm/pgtable.h
> +++ b/arch/mips/include/asm/pgtable.h
> @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
> 			: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
> 			: [global] "r" (page_global));
> 		} else if (kernel_uses_llsc) {
> +			loongson_llsc_mb();
> 			__asm__ __volatile__ (
> 			"	.set	push				\n"
> 			"	.set	"MIPS_ISA_ARCH_LEVEL"		\n"
> @@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval)
> 			"	.set	pop				\n"
> 			: [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp)
> 			: [global] "r" (page_global));
> +			loongson_llsc_mb();
> 		}
> #else /* !CONFIG_SMP */
> 		if (pte_none(*buddy))
> diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
> index 0fce460..3700dcf 100644
> --- a/arch/mips/loongson64/Platform
> +++ b/arch/mips/loongson64/Platform
> @@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
> endif
> 
> cflags-$(CONFIG_CPU_LOONGSON3)	+= -Wa,--trap
> +ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),)
> +  cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc
> +endif
> #
> # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
> # as MIPS64 R2; older versions as just R1.  This leaves the possibility open
> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> index 37b1cb2..e1a4637 100644
> --- a/arch/mips/mm/tlbex.c
> +++ b/arch/mips/mm/tlbex.c
> @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
> 		 * to mimic that here by taking a load/istream page
> 		 * fault.
> 		 */
> +		if (current_cpu_type() == CPU_LOONGSON3)
> +			uasm_i_sync(p, 0);
> 		UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0);
> 		uasm_i_jr(p, ptr);

Here seems not about ll/sc problem as no ll/sc used here at all.
While with this patch, it seems much more stable. 

I tried to add sync before ll and at the target of branch in ll/sc, the stabilization improves, while
The machine still hangs.

> 
> @@ -1556,6 +1558,7 @@ static void build_loongson3_tlb_refill_handler(void)
> 
> 	if (check_for_high_segbits) {
> 		uasm_l_large_segbits_fault(&l, p);
> +		uasm_i_sync(&p, 0);
> 		UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0);
> 		uasm_i_jr(&p, K1);
> 		uasm_i_nop(&p);
> @@ -1646,6 +1649,8 @@ static void
> iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr)
> {
> #ifdef CONFIG_SMP
> +	if (current_cpu_type() == CPU_LOONGSON3)
> +		uasm_i_sync(p, 0);
> # ifdef CONFIG_PHYS_ADDR_T_64BIT
> 	if (cpu_has_64bits)
> 		uasm_i_lld(p, pte, 0, ptr);
> @@ -2259,6 +2264,8 @@ static void build_r4000_tlb_load_handler(void)
> #endif
> 
> 	uasm_l_nopage_tlbl(&l, p);
> +	if (current_cpu_type() == CPU_LOONGSON3)
> +		uasm_i_sync(&p, 0);
> 	build_restore_work_registers(&p);
> #ifdef CONFIG_CPU_MICROMIPS
> 	if ((unsigned long)tlb_do_page_fault_0 & 1) {
> @@ -2313,6 +2320,8 @@ static void build_r4000_tlb_store_handler(void)
> #endif
> 
> 	uasm_l_nopage_tlbs(&l, p);
> +	if (current_cpu_type() == CPU_LOONGSON3)
> +		uasm_i_sync(&p, 0);
> 	build_restore_work_registers(&p);
> #ifdef CONFIG_CPU_MICROMIPS
> 	if ((unsigned long)tlb_do_page_fault_1 & 1) {
> @@ -2368,6 +2377,8 @@ static void build_r4000_tlb_modify_handler(void)
> #endif
> 
> 	uasm_l_nopage_tlbm(&l, p);
> +	if (current_cpu_type() == CPU_LOONGSON3)
> +		uasm_i_sync(&p, 0);
> 	build_restore_work_registers(&p);
> #ifdef CONFIG_CPU_MICROMIPS
> 	if ((unsigned long)tlb_do_page_fault_1 & 1) {



[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux