From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> The generic atomic.h used cmpxchg to implement the atomic operations, it will cause daul loop to reduce the forward guarantee. The patch implement csky custom atomic operations with ldex/stex instructions for the best performance. Important reference comment by Rutland: 8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for full barrier semantics") Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@xxxxxxxxxxxxxx/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3 Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> Signed-off-by: Guo Ren <guoren@xxxxxxxxxx> Cc: Mark Rutland <mark.rutland@xxxxxxx> --- arch/csky/include/asm/atomic.h | 154 +++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 arch/csky/include/asm/atomic.h diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h new file mode 100644 index 000000000000..5ecc657a2a66 --- /dev/null +++ b/arch/csky/include/asm/atomic.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_CSKY_ATOMIC_H +#define __ASM_CSKY_ATOMIC_H + +#ifdef CONFIG_SMP +#include <asm-generic/atomic64.h> + +#include <asm/cmpxchg.h> +#include <asm/barrier.h> + +#define __atomic_acquire_fence() __smp_acquire_fence() + +#define __atomic_release_fence() __smp_release_fence() + +static __always_inline int arch_atomic_read(const atomic_t *v) +{ + return READ_ONCE(v->counter); +} +static __always_inline void arch_atomic_set(atomic_t *v, int i) +{ + WRITE_ONCE(v->counter, i); +} + +#define ATOMIC_OP(op) \ +static __always_inline \ +void arch_atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + __asm__ __volatile__ ( \ + "1: ldex.w %0, (%2) \n" \ + " " #op " %0, %1 \n" \ + " stex.w %0, (%2) \n" \ + " bez %0, 1b \n" \ + : "=&r" (tmp) \ + : "r" (i), "r" (&v->counter) \ + : "memory"); \ +} + +ATOMIC_OP(add) +ATOMIC_OP(sub) +ATOMIC_OP(and) +ATOMIC_OP( or) +ATOMIC_OP(xor) + +#undef ATOMIC_OP + +#define ATOMIC_FETCH_OP(op) \ +static __always_inline \ +int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + register int ret, tmp; \ + __asm__ __volatile__ ( \ + "1: ldex.w %0, (%3) \n" \ + " mov %1, %0 \n" \ + " " #op " %0, %2 \n" \ + " stex.w %0, (%3) \n" \ + " bez %0, 1b \n" \ + : "=&r" (tmp), "=&r" (ret) \ + : "r" (i), "r"(&v->counter) \ + : "memory"); \ + return ret; \ +} + +#define ATOMIC_OP_RETURN(op, c_op) \ +static __always_inline \ +int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ +{ \ + return arch_atomic_fetch_##op##_relaxed(i, v) c_op i; \ +} \ +static __always_inline \ +int arch_atomic_##op##_return(int i, atomic_t *v) \ +{ \ + return arch_atomic_fetch_##op(i, v) c_op i; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_FETCH_OP(op) \ + ATOMIC_OP_RETURN(op, c_op) + +ATOMIC_OPS(add, +) +ATOMIC_OPS(sub, -) + +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_sub arch_atomic_fetch_sub + +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_sub_return arch_atomic_sub_return + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN + +#define ATOMIC_OPS(op) \ + ATOMIC_FETCH_OP(op) + +ATOMIC_OPS(and) +ATOMIC_OPS( or) +ATOMIC_OPS(xor) + +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_xor arch_atomic_fetch_xor + +#undef ATOMIC_OPS + +#undef ATOMIC_FETCH_OP + +#define ATOMIC_OP() \ +static __always_inline \ +int arch_atomic_xchg_relaxed(atomic_t *v, int n) \ +{ \ + return __xchg_relaxed(n, &(v->counter), 4); \ +} \ +static __always_inline \ +int arch_atomic_xchg(atomic_t *v, int n) \ +{ \ + return __xchg(n, &(v->counter), 4); \ +} \ +static __always_inline \ +int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \ +{ \ + return __cmpxchg_relaxed(&(v->counter), o, n, 4); \ +} \ +static __always_inline \ +int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \ +{ \ + return __cmpxchg(&(v->counter), o, n, 4); \ +} + +#define ATOMIC_OPS() \ + ATOMIC_OP() + +ATOMIC_OPS() + +#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed +#define arch_atomic_xchg arch_atomic_xchg +#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed +#define arch_atomic_cmpxchg arch_atomic_cmpxchg + +#undef ATOMIC_OPS +#undef ATOMIC_OP + +#else +#include <asm-generic/atomic.h> +#endif + +#endif /* __ASM_CSKY_ATOMIC_H */ -- 2.25.1