On Mon, Apr 11, 2022 at 10:52 PM <guoren@xxxxxxxxxx> wrote: > > From: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> > > The generic atomic.h used cmpxchg to implement the atomic > operations, it will cause daul loop to reduce the forward > guarantee. The patch implement csky custom atomic operations with > ldex/stex instructions for the best performance. > > Important reference comment by Rutland: > 8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for > full barrier semantics") > > Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=LbAdu7jntZRUa=-dJwL0VfmDfBV5MHB=rcZ-w@xxxxxxxxxxxxxx/T/#m27a0f1342995deae49ce1d0e1f2683f8a181d6c3 > Signed-off-by: Guo Ren <guoren@xxxxxxxxxxxxxxxxx> > Signed-off-by: Guo Ren <guoren@xxxxxxxxxx> > Cc: Mark Rutland <mark.rutland@xxxxxxx> > --- > Changes in V2: > - Fixup use of acquire + release for barrier semantics by Rutland. > --- > arch/csky/include/asm/atomic.h | 130 +++++++++++++++++++++++++++++++++ > 1 file changed, 130 insertions(+) > create mode 100644 arch/csky/include/asm/atomic.h > > diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h > new file mode 100644 > index 000000000000..2e1a22f55ea1 > --- /dev/null > +++ b/arch/csky/include/asm/atomic.h > @@ -0,0 +1,130 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef __ASM_CSKY_ATOMIC_H > +#define __ASM_CSKY_ATOMIC_H > + > +#ifdef CONFIG_SMP > +# include <asm-generic/atomic64.h> > + > +#include <asm/cmpxchg.h> > +#include <asm/barrier.h> > + > +#define __atomic_acquire_fence() __smp_acquire_fence() > + > +#define __atomic_release_fence() __smp_release_fence() > + > +static __always_inline int arch_atomic_read(const atomic_t *v) > +{ > + return READ_ONCE(v->counter); > +} > +static __always_inline void arch_atomic_set(atomic_t *v, int i) > +{ > + WRITE_ONCE(v->counter, i); > +} > + > +#define ATOMIC_OP(op, asm_op, I) \ > +static __always_inline \ > +void arch_atomic_##op(int i, atomic_t *v) \ > +{ \ > + unsigned long tmp; \ > + __asm__ __volatile__ ( \ > + "1: ldex.w %0, (%2) \n" \ > + " " #op " %0, %1 \n" \ > + " stex.w %0, (%2) \n" \ > + " bez %0, 1b \n" \ > + : "=&r" (tmp) \ > + : "r" (I), "r" (&v->counter) \ > + : "memory"); \ > +} > + > +ATOMIC_OP(add, add, i) > +ATOMIC_OP(sub, add, -i) > +ATOMIC_OP(and, and, i) > +ATOMIC_OP( or, or, i) > +ATOMIC_OP(xor, xor, i) Sorry, it should be fixed up by: #define ATOMIC_OP(op) \ static __always_inline \ void arch_atomic_##op(int i, atomic_t *v) \ { \ unsigned long tmp; \ __asm__ __volatile__ ( \ "1: ldex.w %0, (%2) \n" \ " " #op " %0, %1 \n" \ " stex.w %0, (%2) \n" \ " bez %0, 1b \n" \ : "=&r" (tmp) \ : "r" (i), "r" (&v->counter) \ : "memory"); \ } ATOMIC_OP(add) ATOMIC_OP(sub) ATOMIC_OP(and) ATOMIC_OP( or) ATOMIC_OP(xor) > + > +#undef ATOMIC_OP > + > +#define ATOMIC_FETCH_OP(op, asm_op, I) \ > +static __always_inline \ > +int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ > +{ \ > + register int ret, tmp; \ > + __asm__ __volatile__ ( \ > + "1: ldex.w %0, (%3) \n" \ > + " mov %1, %0 \n" \ > + " " #op " %0, %2 \n" \ > + " stex.w %0, (%3) \n" \ > + " bez %0, 1b \n" \ > + : "=&r" (tmp), "=&r" (ret) \ > + : "r" (I), "r"(&v->counter) \ > + : "memory"); \ > + return ret; \ > +} > + > +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I) \ > +static __always_inline \ > +int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ > +{ \ > + return arch_atomic_fetch_##op##_relaxed(i, v) c_op I; \ > +} > + > +#define ATOMIC_OPS(op, asm_op, c_op, I) \ > + ATOMIC_FETCH_OP( op, asm_op, I) \ > + ATOMIC_OP_RETURN(op, asm_op, c_op, I) > + > +ATOMIC_OPS(add, add, +, i) > +ATOMIC_OPS(sub, add, +, -i) > + > +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed > +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed > + > +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed > +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed > + > +#undef ATOMIC_OPS > +#undef ATOMIC_OP_RETURN > + > +#define ATOMIC_OPS(op, asm_op, I) \ > + ATOMIC_FETCH_OP(op, asm_op, I) > + > +ATOMIC_OPS(and, and, i) > +ATOMIC_OPS( or, or, i) > +ATOMIC_OPS(xor, xor, i) > + > +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed > +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed > +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed > + > +#undef ATOMIC_OPS > + > +#undef ATOMIC_FETCH_OP > + > +#define ATOMIC_OP() \ > +static __always_inline \ > +int arch_atomic_xchg_relaxed(atomic_t *v, int n) \ > +{ \ > + return __xchg_relaxed(n, &(v->counter), 4); \ > +} \ > +static __always_inline \ > +int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \ > +{ \ > + return __cmpxchg_relaxed(&(v->counter), o, n, 4); \ > +} > + > +#define ATOMIC_OPS() \ > + ATOMIC_OP() > + > +ATOMIC_OPS() > + > +#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed > +#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed > + > +#undef ATOMIC_OPS > +#undef ATOMIC_OP > + > +#else > +# include <asm-generic/atomic.h> > +#endif > + > +#endif /* __ASM_CSKY_ATOMIC_H */ > -- > 2.25.1 > -- Best Regards Guo Ren ML: https://lore.kernel.org/linux-csky/