Excerpts from Boqun Feng's message of November 14, 2020 1:30 am: > Hi Nicholas, > > On Wed, Nov 11, 2020 at 09:07:23PM +1000, Nicholas Piggin wrote: >> All the cool kids are doing it. >> >> Signed-off-by: Nicholas Piggin <npiggin@xxxxxxxxx> >> --- >> arch/powerpc/include/asm/atomic.h | 681 ++++++++++------------------- >> arch/powerpc/include/asm/cmpxchg.h | 62 +-- >> 2 files changed, 248 insertions(+), 495 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h >> index 8a55eb8cc97b..899aa2403ba7 100644 >> --- a/arch/powerpc/include/asm/atomic.h >> +++ b/arch/powerpc/include/asm/atomic.h >> @@ -11,185 +11,285 @@ >> #include <asm/cmpxchg.h> >> #include <asm/barrier.h> >> >> +#define ARCH_ATOMIC >> + >> +#ifndef CONFIG_64BIT >> +#include <asm-generic/atomic64.h> >> +#endif >> + >> /* >> * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with >> * a "bne-" instruction at the end, so an isync is enough as a acquire barrier >> * on the platform without lwsync. >> */ >> #define __atomic_acquire_fence() \ >> - __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory") >> + asm volatile(PPC_ACQUIRE_BARRIER "" : : : "memory") >> >> #define __atomic_release_fence() \ >> - __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory") >> + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory") >> >> -static __inline__ int atomic_read(const atomic_t *v) >> -{ >> - int t; >> +#define __atomic_pre_full_fence smp_mb >> >> - __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter)); >> +#define __atomic_post_full_fence smp_mb >> Thanks for the review. > Do you need to define __atomic_{pre,post}_full_fence for PPC? IIRC, they > are default smp_mb__{before,atomic}_atomic(), so are smp_mb() defautly > on PPC. Okay I didn't realise that's not required. >> - return t; >> +#define arch_atomic_read(v) __READ_ONCE((v)->counter) >> +#define arch_atomic_set(v, i) __WRITE_ONCE(((v)->counter), (i)) >> +#ifdef CONFIG_64BIT >> +#define ATOMIC64_INIT(i) { (i) } >> +#define arch_atomic64_read(v) __READ_ONCE((v)->counter) >> +#define arch_atomic64_set(v, i) __WRITE_ONCE(((v)->counter), (i)) >> +#endif >> + > [...] >> >> +#define ATOMIC_FETCH_OP_UNLESS_RELAXED(name, type, dtype, width, asm_op) \ >> +static inline int arch_##name##_relaxed(type *v, dtype a, dtype u) \ > > I don't think we have atomic_fetch_*_unless_relaxed() at atomic APIs, > ditto for: > > atomic_fetch_add_unless_relaxed() > atomic_inc_not_zero_relaxed() > atomic_dec_if_positive_relaxed() > > , and we don't have the _acquire() and _release() variants for them > either, and if you don't define their fully-ordered version (e.g. > atomic_inc_not_zero()), atomic-arch-fallback.h will use read and cmpxchg > to implement them, and I think not what we want. Okay. How can those be added? The atoimc generation is pretty complicated. > [...] >> >> #endif /* __KERNEL__ */ >> #endif /* _ASM_POWERPC_ATOMIC_H_ */ >> diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h >> index cf091c4c22e5..181f7e8b3281 100644 >> --- a/arch/powerpc/include/asm/cmpxchg.h >> +++ b/arch/powerpc/include/asm/cmpxchg.h >> @@ -192,7 +192,7 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size) >> (unsigned long)_x_, sizeof(*(ptr))); \ >> }) >> >> -#define xchg_relaxed(ptr, x) \ >> +#define arch_xchg_relaxed(ptr, x) \ >> ({ \ >> __typeof__(*(ptr)) _x_ = (x); \ >> (__typeof__(*(ptr))) __xchg_relaxed((ptr), \ >> @@ -448,35 +448,7 @@ __cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new, >> return old; >> } >> >> -static __always_inline unsigned long >> -__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new, >> - unsigned int size) >> -{ >> - switch (size) { >> - case 1: >> - return __cmpxchg_u8_acquire(ptr, old, new); >> - case 2: >> - return __cmpxchg_u16_acquire(ptr, old, new); >> - case 4: >> - return __cmpxchg_u32_acquire(ptr, old, new); >> -#ifdef CONFIG_PPC64 >> - case 8: >> - return __cmpxchg_u64_acquire(ptr, old, new); >> -#endif >> - } >> - BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire"); >> - return old; >> -} >> -#define cmpxchg(ptr, o, n) \ >> - ({ \ >> - __typeof__(*(ptr)) _o_ = (o); \ >> - __typeof__(*(ptr)) _n_ = (n); \ >> - (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ >> - (unsigned long)_n_, sizeof(*(ptr))); \ >> - }) >> - >> - > > If you remove {atomic_}_cmpxchg_{,_acquire}() and use the version > provided by atomic-arch-fallback.h, then a fail cmpxchg or > cmpxchg_acquire() will still result into a full barrier or a acquire > barrier after the RMW operation, the barrier is not necessary and > probably this is not what we want? Why is that done? That seems like a very subtle difference. Shouldn't the fallback version skip the barrier? Thanks, Nick