On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and lld/scd is very weak ordering. We should add sync instructions before each ll/lld and after the last sc/scd to workaround. Otherwise, this flaw will cause deadlock occationally (e.g. when doing heavy load test with LTP). This patch is a less invasive but more comfortable solution, it is even not a enough solution in theory (but enough in practice). The solution which is enough in theory (which is more than a minimal change) looks too ugly, and you can refer to: https://patchwork.linux-mips.org/patch/21134/. Why disable fix-loongson3-llsc in compiler? Because compiler fix will cause problems in kernel's .fixup section. Cc: stable@xxxxxxxxxxxxxxx Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx> --- arch/mips/include/asm/atomic.h | 4 ++++ arch/mips/include/asm/barrier.h | 6 ++++++ arch/mips/include/asm/bitops.h | 10 ++++++++++ arch/mips/include/asm/edac.h | 2 ++ arch/mips/include/asm/futex.h | 4 ++++ arch/mips/include/asm/pgtable.h | 2 ++ arch/mips/loongson64/Platform | 3 +++ arch/mips/mm/tlbex.c | 11 +++++++++++ 8 files changed, 42 insertions(+) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 43fcd35..dbbc6c1 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ if (kernel_uses_llsc) { \ int temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -68,6 +69,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ + loongson_llsc_mb(); \ } else { \ unsigned long flags; \ \ @@ -256,6 +258,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ if (kernel_uses_llsc) { \ long temp; \ \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set "MIPS_ISA_LEVEL" \n" \ @@ -266,6 +269,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ " .set pop \n" \ : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ : "Ir" (i)); \ + loongson_llsc_mb(); \ } else { \ unsigned long flags; \ \ diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a5eb1bb..7ce9a15 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h @@ -222,6 +222,12 @@ #define __smp_mb__before_atomic() __smp_mb__before_llsc() #define __smp_mb__after_atomic() smp_llsc_mb() +#if defined(CONFIG_LOONGSON3) && defined(CONFIG_SMP) /* Loongson-3's LLSC workaround */ +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") +#else +#define loongson_llsc_mb() __asm__ __volatile__(" " : : :"memory") +#endif + #include <asm-generic/barrier.h> #endif /* __ASM_BARRIER_H */ diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index f2a840f..d70c7e5 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # set_bit \n" @@ -77,8 +78,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit), "r" (~0)); } while (unlikely(!temp)); + loongson_llsc_mb(); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -90,6 +93,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp)); + loongson_llsc_mb(); } else __mips_set_bit(nr, addr); } @@ -123,6 +127,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "ir" (~(1UL << bit))); #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { + loongson_llsc_mb(); do { __asm__ __volatile__( " " __LL "%0, %1 # clear_bit \n" @@ -131,8 +136,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (bit)); } while (unlikely(!temp)); + loongson_llsc_mb(); #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ } else if (kernel_uses_llsc) { + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -144,6 +151,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (~(1UL << bit))); } while (unlikely(!temp)); + loongson_llsc_mb(); } else __mips_clear_bit(nr, addr); } @@ -193,6 +201,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); unsigned long temp; + loongson_llsc_mb(); do { __asm__ __volatile__( " .set push \n" @@ -204,6 +213,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) : "ir" (1UL << bit)); } while (unlikely(!temp)); + loongson_llsc_mb(); } else __mips_change_bit(nr, addr); } diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h index c5d1477..1abf69c 100644 --- a/arch/mips/include/asm/edac.h +++ b/arch/mips/include/asm/edac.h @@ -20,6 +20,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) * Intel: asm("lock; addl $0, %0"::"m"(*virt_addr)); */ + loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set mips2 \n" @@ -30,6 +31,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) " .set pop \n" : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr) : GCC_OFF_SMALL_ASM() (*virt_addr)); + loongson_llsc_mb(); virt_addr++; } diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h index 8eff134..07b0be8 100644 --- a/arch/mips/include/asm/futex.h +++ b/arch/mips/include/asm/futex.h @@ -50,6 +50,7 @@ "i" (-EFAULT) \ : "memory"); \ } else if (cpu_has_llsc) { \ + loongson_llsc_mb(); \ __asm__ __volatile__( \ " .set push \n" \ " .set noat \n" \ @@ -78,6 +79,7 @@ : "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg), \ "i" (-EFAULT) \ : "memory"); \ + loongson_llsc_mb(); \ } else \ ret = -ENOSYS; \ } @@ -163,6 +165,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, "i" (-EFAULT) : "memory"); } else if (cpu_has_llsc) { + loongson_llsc_mb(); __asm__ __volatile__( "# futex_atomic_cmpxchg_inatomic \n" " .set push \n" @@ -192,6 +195,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), "i" (-EFAULT) : "memory"); + loongson_llsc_mb(); } else return -ENOSYS; diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 57933fc..910851c 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); } else if (kernel_uses_llsc) { + loongson_llsc_mb(); __asm__ __volatile__ ( " .set push \n" " .set "MIPS_ISA_ARCH_LEVEL" \n" @@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) " .set pop \n" : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); + loongson_llsc_mb(); } #else /* !CONFIG_SMP */ if (pte_none(*buddy)) diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index 0fce460..3700dcf 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap +ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),) + cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc +endif # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 37b1cb2..e1a4637 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, * to mimic that here by taking a load/istream page * fault. */ + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(p, 0); UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(p, ptr); @@ -1556,6 +1558,7 @@ static void build_loongson3_tlb_refill_handler(void) if (check_for_high_segbits) { uasm_l_large_segbits_fault(&l, p); + uasm_i_sync(&p, 0); UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0); uasm_i_jr(&p, K1); uasm_i_nop(&p); @@ -1646,6 +1649,8 @@ static void iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) { #ifdef CONFIG_SMP + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(p, 0); # ifdef CONFIG_PHYS_ADDR_T_64BIT if (cpu_has_64bits) uasm_i_lld(p, pte, 0, ptr); @@ -2259,6 +2264,8 @@ static void build_r4000_tlb_load_handler(void) #endif uasm_l_nopage_tlbl(&l, p); + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_0 & 1) { @@ -2313,6 +2320,8 @@ static void build_r4000_tlb_store_handler(void) #endif uasm_l_nopage_tlbs(&l, p); + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { @@ -2368,6 +2377,8 @@ static void build_r4000_tlb_modify_handler(void) #endif uasm_l_nopage_tlbm(&l, p); + if (current_cpu_type() == CPU_LOONGSON3) + uasm_i_sync(&p, 0); build_restore_work_registers(&p); #ifdef CONFIG_CPU_MICROMIPS if ((unsigned long)tlb_do_page_fault_1 & 1) { -- 2.7.0