> 在 2018年12月25日,上午8:51,Huacai Chen <chenhc@xxxxxxxxxx> 写道: > > On the Loongson-2G/2H/3A/3B there is a hardware flaw that ll/sc and > lld/scd is very weak ordering. We should add sync instructions before > each ll/lld and after the last sc/scd to workaround. Otherwise, this > flaw will cause deadlock occationally (e.g. when doing heavy load test > with LTP). > > This patch is a less invasive but more comfortable solution, it is even > not a enough solution in theory (but enough in practice). The solution > which is enough in theory (which is more than a minimal change) looks > too ugly, and you can refer to: > https://patchwork.linux-mips.org/patch/21134/. > > Why disable fix-loongson3-llsc in compiler? > Because compiler fix will cause problems in kernel's .fixup section. It is __ex_table section. > > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Huacai Chen <chenhc@xxxxxxxxxx> > --- > arch/mips/include/asm/atomic.h | 4 ++++ > arch/mips/include/asm/barrier.h | 6 ++++++ > arch/mips/include/asm/bitops.h | 10 ++++++++++ > arch/mips/include/asm/edac.h | 2 ++ > arch/mips/include/asm/futex.h | 4 ++++ > arch/mips/include/asm/pgtable.h | 2 ++ > arch/mips/loongson64/Platform | 3 +++ > arch/mips/mm/tlbex.c | 11 +++++++++++ > 8 files changed, 42 insertions(+) > > diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h > index 43fcd35..dbbc6c1 100644 > --- a/arch/mips/include/asm/atomic.h > +++ b/arch/mips/include/asm/atomic.h > @@ -58,6 +58,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ > if (kernel_uses_llsc) { \ > int temp; \ > \ > + loongson_llsc_mb(); \ I noticed that we have smp_mb__before_llsc And smp_llsc_mb There are smp_mb__before_llsc/smp_llsc_mb in some situation, and it is not in some situation? Why is it like this? I guess we should add smp_mb__before_llsc/smp_llsc_mb pairs for all ll/sc. > __asm__ __volatile__( \ > " .set push \n" \ > " .set "MIPS_ISA_LEVEL" \n" \ > @@ -68,6 +69,7 @@ static __inline__ void atomic_##op(int i, atomic_t * v) \ > " .set pop \n" \ > : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ > : "Ir" (i)); \ > + loongson_llsc_mb(); \ > } else { \ > unsigned long flags; \ > \ > @@ -256,6 +258,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ > if (kernel_uses_llsc) { \ > long temp; \ > \ > + loongson_llsc_mb(); \ > __asm__ __volatile__( \ > " .set push \n" \ > " .set "MIPS_ISA_LEVEL" \n" \ > @@ -266,6 +269,7 @@ static __inline__ void atomic64_##op(long i, atomic64_t * v) \ > " .set pop \n" \ > : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (v->counter) \ > : "Ir" (i)); \ > + loongson_llsc_mb(); \ > } else { \ > unsigned long flags; \ > \ > diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h > index a5eb1bb..7ce9a15 100644 > --- a/arch/mips/include/asm/barrier.h > +++ b/arch/mips/include/asm/barrier.h > @@ -222,6 +222,12 @@ > #define __smp_mb__before_atomic() __smp_mb__before_llsc() > #define __smp_mb__after_atomic() smp_llsc_mb() > > +#if defined(CONFIG_LOONGSON3) && defined(CONFIG_SMP) /* Loongson-3's LLSC workaround */ > +#define loongson_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory") > +#else > +#define loongson_llsc_mb() __asm__ __volatile__(" " : : :"memory") > +#endif > + > #include <asm-generic/barrier.h> > > #endif /* __ASM_BARRIER_H */ > diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h > index f2a840f..d70c7e5 100644 > --- a/arch/mips/include/asm/bitops.h > +++ b/arch/mips/include/asm/bitops.h > @@ -69,6 +69,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) > : "ir" (1UL << bit), GCC_OFF_SMALL_ASM() (*m)); > #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) > } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { > + loongson_llsc_mb(); > do { > __asm__ __volatile__( > " " __LL "%0, %1 # set_bit \n" > @@ -77,8 +78,10 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) > : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) > : "ir" (bit), "r" (~0)); > } while (unlikely(!temp)); > + loongson_llsc_mb(); > #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ > } else if (kernel_uses_llsc) { > + loongson_llsc_mb(); > do { > __asm__ __volatile__( > " .set push \n" > @@ -90,6 +93,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) > : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) > : "ir" (1UL << bit)); > } while (unlikely(!temp)); > + loongson_llsc_mb(); > } else > __mips_set_bit(nr, addr); > } > @@ -123,6 +127,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) > : "ir" (~(1UL << bit))); > #if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) > } else if (kernel_uses_llsc && __builtin_constant_p(bit)) { > + loongson_llsc_mb(); > do { > __asm__ __volatile__( > " " __LL "%0, %1 # clear_bit \n" > @@ -131,8 +136,10 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) > : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) > : "ir" (bit)); > } while (unlikely(!temp)); > + loongson_llsc_mb(); > #endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */ > } else if (kernel_uses_llsc) { > + loongson_llsc_mb(); > do { > __asm__ __volatile__( > " .set push \n" > @@ -144,6 +151,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) > : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) > : "ir" (~(1UL << bit))); > } while (unlikely(!temp)); > + loongson_llsc_mb(); > } else > __mips_clear_bit(nr, addr); > } > @@ -193,6 +201,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) > unsigned long *m = ((unsigned long *) addr) + (nr >> SZLONG_LOG); > unsigned long temp; > > + loongson_llsc_mb(); > do { > __asm__ __volatile__( > " .set push \n" > @@ -204,6 +213,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) > : "=&r" (temp), "+" GCC_OFF_SMALL_ASM() (*m) > : "ir" (1UL << bit)); > } while (unlikely(!temp)); > + loongson_llsc_mb(); > } else > __mips_change_bit(nr, addr); > } > diff --git a/arch/mips/include/asm/edac.h b/arch/mips/include/asm/edac.h > index c5d1477..1abf69c 100644 > --- a/arch/mips/include/asm/edac.h > +++ b/arch/mips/include/asm/edac.h > @@ -20,6 +20,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) > * Intel: asm("lock; addl $0, %0"::"m"(*virt_addr)); > */ > > + loongson_llsc_mb(); > __asm__ __volatile__ ( > " .set push \n" > " .set mips2 \n" > @@ -30,6 +31,7 @@ static inline void edac_atomic_scrub(void *va, u32 size) > " .set pop \n" > : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*virt_addr) > : GCC_OFF_SMALL_ASM() (*virt_addr)); > + loongson_llsc_mb(); > > virt_addr++; > } > diff --git a/arch/mips/include/asm/futex.h b/arch/mips/include/asm/futex.h > index 8eff134..07b0be8 100644 > --- a/arch/mips/include/asm/futex.h > +++ b/arch/mips/include/asm/futex.h > @@ -50,6 +50,7 @@ > "i" (-EFAULT) \ > : "memory"); \ > } else if (cpu_has_llsc) { \ > + loongson_llsc_mb(); \ > __asm__ __volatile__( \ > " .set push \n" \ > " .set noat \n" \ > @@ -78,6 +79,7 @@ > : "0" (0), GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oparg), \ > "i" (-EFAULT) \ > : "memory"); \ > + loongson_llsc_mb(); \ > } else \ > ret = -ENOSYS; \ > } > @@ -163,6 +165,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, > "i" (-EFAULT) > : "memory"); > } else if (cpu_has_llsc) { > + loongson_llsc_mb(); > __asm__ __volatile__( > "# futex_atomic_cmpxchg_inatomic \n" > " .set push \n" > @@ -192,6 +195,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, > : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval), > "i" (-EFAULT) > : "memory"); > + loongson_llsc_mb(); > } else > return -ENOSYS; > > diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h > index 57933fc..910851c 100644 > --- a/arch/mips/include/asm/pgtable.h > +++ b/arch/mips/include/asm/pgtable.h > @@ -228,6 +228,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) > : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) > : [global] "r" (page_global)); > } else if (kernel_uses_llsc) { > + loongson_llsc_mb(); > __asm__ __volatile__ ( > " .set push \n" > " .set "MIPS_ISA_ARCH_LEVEL" \n" > @@ -242,6 +243,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) > " .set pop \n" > : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) > : [global] "r" (page_global)); > + loongson_llsc_mb(); > } > #else /* !CONFIG_SMP */ > if (pte_none(*buddy)) > diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform > index 0fce460..3700dcf 100644 > --- a/arch/mips/loongson64/Platform > +++ b/arch/mips/loongson64/Platform > @@ -23,6 +23,9 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS > endif > > cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap > +ifneq ($(call as-option,-Wa$(comma)-mfix-loongson3-llsc,),) > + cflags-$(CONFIG_CPU_LOONGSON3) += -Wa$(comma)-mno-fix-loongson3-llsc > +endif > # > # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a > # as MIPS64 R2; older versions as just R1. This leaves the possibility open > diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c > index 37b1cb2..e1a4637 100644 > --- a/arch/mips/mm/tlbex.c > +++ b/arch/mips/mm/tlbex.c > @@ -932,6 +932,8 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, > * to mimic that here by taking a load/istream page > * fault. > */ > + if (current_cpu_type() == CPU_LOONGSON3) > + uasm_i_sync(p, 0); > UASM_i_LA(p, ptr, (unsigned long)tlb_do_page_fault_0); > uasm_i_jr(p, ptr); Here seems not about ll/sc problem as no ll/sc used here at all. While with this patch, it seems much more stable. I tried to add sync before ll and at the target of branch in ll/sc, the stabilization improves, while The machine still hangs. > > @@ -1556,6 +1558,7 @@ static void build_loongson3_tlb_refill_handler(void) > > if (check_for_high_segbits) { > uasm_l_large_segbits_fault(&l, p); > + uasm_i_sync(&p, 0); > UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0); > uasm_i_jr(&p, K1); > uasm_i_nop(&p); > @@ -1646,6 +1649,8 @@ static void > iPTE_LW(u32 **p, unsigned int pte, unsigned int ptr) > { > #ifdef CONFIG_SMP > + if (current_cpu_type() == CPU_LOONGSON3) > + uasm_i_sync(p, 0); > # ifdef CONFIG_PHYS_ADDR_T_64BIT > if (cpu_has_64bits) > uasm_i_lld(p, pte, 0, ptr); > @@ -2259,6 +2264,8 @@ static void build_r4000_tlb_load_handler(void) > #endif > > uasm_l_nopage_tlbl(&l, p); > + if (current_cpu_type() == CPU_LOONGSON3) > + uasm_i_sync(&p, 0); > build_restore_work_registers(&p); > #ifdef CONFIG_CPU_MICROMIPS > if ((unsigned long)tlb_do_page_fault_0 & 1) { > @@ -2313,6 +2320,8 @@ static void build_r4000_tlb_store_handler(void) > #endif > > uasm_l_nopage_tlbs(&l, p); > + if (current_cpu_type() == CPU_LOONGSON3) > + uasm_i_sync(&p, 0); > build_restore_work_registers(&p); > #ifdef CONFIG_CPU_MICROMIPS > if ((unsigned long)tlb_do_page_fault_1 & 1) { > @@ -2368,6 +2377,8 @@ static void build_r4000_tlb_modify_handler(void) > #endif > > uasm_l_nopage_tlbm(&l, p); > + if (current_cpu_type() == CPU_LOONGSON3) > + uasm_i_sync(&p, 0); > build_restore_work_registers(&p); > #ifdef CONFIG_CPU_MICROMIPS > if ((unsigned long)tlb_do_page_fault_1 & 1) {