On Fri, May 13, 2022 at 11:10 AM Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote: > > On Tue, May 10, 2022 at 05:42:17PM +0200, Uros Bizjak wrote: > > For the Changelog I would focus on the 64bit improvement and leave 32bit > as a side-note. Thanks, I will rephrase the ChangeLog. > > > --- > > arch/x86/include/asm/cmpxchg_32.h | 43 ++++++++++++++++++++++ > > arch/x86/include/asm/cmpxchg_64.h | 6 +++ > > include/linux/atomic/atomic-instrumented.h | 40 +++++++++++++++++++- > > scripts/atomic/gen-atomic-instrumented.sh | 2 +- > > 4 files changed, 89 insertions(+), 2 deletions(-) > > > > diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h > > index 0a7fe0321613..e874ff7f7529 100644 > > --- a/arch/x86/include/asm/cmpxchg_32.h > > +++ b/arch/x86/include/asm/cmpxchg_32.h > > @@ -42,6 +42,9 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) > > #define arch_cmpxchg64_local(ptr, o, n) \ > > ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ > > (unsigned long long)(n))) > > +#define arch_try_cmpxchg64(ptr, po, n) \ > > + ((__typeof__(*(ptr)))__try_cmpxchg64((ptr), (unsigned long long *)(po), \ > > + (unsigned long long)(n))) > > #endif > > > > static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new) > > @@ -70,6 +73,25 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) > > return prev; > > } > > > > +static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new) > > +{ > > + bool success; > > + u64 prev; > > + asm volatile(LOCK_PREFIX "cmpxchg8b %2" > > + CC_SET(z) > > + : CC_OUT(z) (success), > > + "=A" (prev), > > + "+m" (*ptr) > > + : "b" ((u32)new), > > + "c" ((u32)(new >> 32)), > > + "1" (*pold) > > + : "memory"); > > + > > + if (unlikely(!success)) > > + *pold = prev; > > I would prefer this be more like the existing try_cmpxchg code, > perhaps: > > u64 old = *pold; > > asm volatile (LOCK_PREFIX "cmpxchg8b %[ptr]" > CC_SET(z) > : CC_OUT(z) (success), > [ptr] "+m" (*ptr) > "+A" (old) > : "b" ((u32)new) > "c" ((u32)(new >> 32)) > : "memory"); > > if (unlikely(!success)) > *pold = old; > > The existing 32bit cmpxchg code is a 'bit' crusty. I was trying to follow the existing __cmpxchg64 as much as possible, with the intention of a follow-up patch that would modernize everything in cmpxchg_32.h. I can surely go the other way and submit modernized new code. > > + return success; > > +} > > + > > #ifndef CONFIG_X86_CMPXCHG64 > > /* > > * Building a kernel capable running on 80386 and 80486. It may be necessary > > @@ -108,6 +130,27 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) > > : "memory"); \ > > __ret; }) > > > > +#define arch_try_cmpxchg64(ptr, po, n) \ > > +({ \ > > + bool success; \ > > + __typeof__(*(ptr)) __prev; \ > > + __typeof__(ptr) _old = (__typeof__(ptr))(po); \ > > + __typeof__(*(ptr)) __old = *_old; \ > > + __typeof__(*(ptr)) __new = (n); \ > > + alternative_io(LOCK_PREFIX_HERE \ > > + "call cmpxchg8b_emu", \ > > + "lock; cmpxchg8b (%%esi)" , \ > > + X86_FEATURE_CX8, \ > > + "=A" (__prev), \ > > + "S" ((ptr)), "0" (__old), \ > > + "b" ((unsigned int)__new), \ > > + "c" ((unsigned int)(__new>>32)) \ > > + : "memory"); \ > > + success = (__prev == __old); \ > > + if (unlikely(!success)) \ > > + *_old = __prev; \ > > + likely(success); \ > > +}) > > Wouldn't this be better written like the normal fallback wrapper? > > static __always_inline bool > arch_try_cmpxchg64(u64 *v, u64 *old, u64 new) > { > u64 r, o = *old; > r = arch_cmpxchg64(v, o, new); > if (unlikely(r != o)) > *old = r; > return likely(r == o); > } > > Less magical, same exact code. Also, I tried to follow up the existing #defines. Will improve the code according to your suggestion here. Thanks, Uros.