On Mon, Dec 19, 2022 at 04:35:30PM +0100, Peter Zijlstra wrote: > For all architectures that currently support cmpxchg_double() > implement the cmpxchg128() family of functions that is basically the > same but with a saner interface. > > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> > --- > arch/arm64/include/asm/atomic_ll_sc.h | 38 +++++++++++++++++++++++ > arch/arm64/include/asm/atomic_lse.h | 33 +++++++++++++++++++- > arch/arm64/include/asm/cmpxchg.h | 26 ++++++++++++++++ > arch/s390/include/asm/cmpxchg.h | 33 ++++++++++++++++++++ > arch/x86/include/asm/cmpxchg_32.h | 3 + > arch/x86/include/asm/cmpxchg_64.h | 55 +++++++++++++++++++++++++++++++++- > 6 files changed, 185 insertions(+), 3 deletions(-) > > --- a/arch/arm64/include/asm/atomic_ll_sc.h > +++ b/arch/arm64/include/asm/atomic_ll_sc.h > @@ -326,6 +326,44 @@ __CMPXCHG_DBL( , , , ) > __CMPXCHG_DBL(_mb, dmb ish, l, "memory") > > #undef __CMPXCHG_DBL > + > +union __u128_halves { > + u128 full; > + struct { > + u64 low, high; > + }; > +}; > + > +#define __CMPXCHG128(name, mb, rel, cl) \ > +static __always_inline u128 \ > +__ll_sc__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ > +{ \ > + union __u128_halves r, o = { .full = (old) }, \ > + n = { .full = (new) }; \ > + \ > + asm volatile("// __cmpxchg128" #name "\n" \ > + " prfm pstl1strm, %2\n" \ > + "1: ldxp %0, %1, %2\n" \ > + " eor %3, %0, %3\n" \ > + " eor %4, %1, %4\n" \ > + " orr %3, %4, %3\n" \ > + " cbnz %3, 2f\n" \ > + " st" #rel "xp %w3, %5, %6, %2\n" \ > + " cbnz %w3, 1b\n" \ > + " " #mb "\n" \ > + "2:" \ > + : "=&r" (r.low), "=&r" (r.high), "+Q" (*(unsigned long *)ptr) \ > + : "r" (o.low), "r" (o.high), "r" (n.low), "r" (n.high) \ > + : cl); \ > + \ > + return r.full; \ > +} > + > +__CMPXCHG128( , , , ) > +__CMPXCHG128(_mb, dmb ish, l, "memory") > + > +#undef __CMPXCHG128 > + > #undef K > > #endif /* __ASM_ATOMIC_LL_SC_H */ > --- a/arch/arm64/include/asm/atomic_lse.h > +++ b/arch/arm64/include/asm/atomic_lse.h > @@ -151,7 +151,7 @@ __lse_atomic64_fetch_##op##name(s64 i, a > " " #asm_op #mb " %[i], %[old], %[v]" \ > : [v] "+Q" (v->counter), \ > [old] "=r" (old) \ > - : [i] "r" (i) \ > + : [i] "r" (i) \ > : cl); \ > \ > return old; \ > @@ -324,4 +324,35 @@ __CMPXCHG_DBL(_mb, al, "memory") > > #undef __CMPXCHG_DBL > > +#define __CMPXCHG128(name, mb, cl...) \ > +static __always_inline u128 \ > +__lse__cmpxchg128##name(volatile u128 *ptr, u128 old, u128 new) \ > +{ \ > + union __u128_halves r, o = { .full = (old) }, \ > + n = { .full = (new) }; \ > + register unsigned long x0 asm ("x0") = o.low; \ > + register unsigned long x1 asm ("x1") = o.high; \ > + register unsigned long x2 asm ("x2") = n.low; \ > + register unsigned long x3 asm ("x3") = n.high; \ > + register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ > + \ > + asm volatile( \ > + __LSE_PREAMBLE \ > + " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ > + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ > + [v] "+Q" (*(unsigned long *)ptr) \ > + : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ Issue #1: the line below can be removed, otherwise.. > + [oldval1] "r" (r.low), [oldval2] "r" (r.high) \ warning: ./arch/arm64/include/asm/atomic_lse.h: In function '__lse__cmpxchg128_mb': ./arch/arm64/include/asm/atomic_lse.h:309:27: warning: 'r.<U97b8>.low' is used uninitialized [-Wuninitialized] 309 | [oldval1] "r" (r.low), [oldval2] "r" (r.high) > + : cl); \ > + \ > + r.low = x0; r.high = x1; \ > + \ > + return r.full; \ > +} > + > +__CMPXCHG128( , ) > +__CMPXCHG128(_mb, al, "memory") > + > +#undef __CMPXCHG128 > + > #endif /* __ASM_ATOMIC_LSE_H */ > --- a/arch/arm64/include/asm/cmpxchg.h > +++ b/arch/arm64/include/asm/cmpxchg.h > @@ -147,6 +147,19 @@ __CMPXCHG_DBL(_mb) > > #undef __CMPXCHG_DBL > > +#define __CMPXCHG128(name) \ > +static inline long __cmpxchg128##name(volatile u128 *ptr, \ Issue #2: this should be static inline u128 __cmpxchg128##name(..) because cmpxchg* needs to return the old value. Regards, Boqun > + u128 old, u128 new) \ > +{ \ > + return __lse_ll_sc_body(_cmpxchg128##name, \ > + ptr, old, new); \ > +} > + > +__CMPXCHG128( ) > +__CMPXCHG128(_mb) > + > +#undef __CMPXCHG128 > + > #define __CMPXCHG_GEN(sfx) \ > static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ > unsigned long old, \ > @@ -229,6 +242,19 @@ __CMPXCHG_GEN(_mb) > __ret; \ > }) > > +/* cmpxchg128 */ > +#define system_has_cmpxchg128() 1 > + > +#define arch_cmpxchg128(ptr, o, n) \ > +({ \ > + __cmpxchg128_mb((ptr), (o), (n)); \ > +}) > + > +#define arch_cmpxchg128_local(ptr, o, n) \ > +({ \ > + __cmpxchg128((ptr), (o), (n)); \ > +}) > + > #define __CMPWAIT_CASE(w, sfx, sz) \ > static inline void __cmpwait_case_##sz(volatile void *ptr, \ > unsigned long val) \ > --- a/arch/s390/include/asm/cmpxchg.h > +++ b/arch/s390/include/asm/cmpxchg.h > @@ -201,4 +201,37 @@ static __always_inline int __cmpxchg_dou > (unsigned long)(n1), (unsigned long)(n2)); \ > }) > > +#define system_has_cmpxchg128() 1 > + > +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) > +{ > + asm volatile( > + " cdsg %[old],%[new],%[ptr]\n" > + : [old] "+&d" (old) > + : [new] "d" (new), > + [ptr] "QS" (*(unsigned long *)ptr) > + : "memory", "cc"); > + return old; > +} > + > +static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *oldp, u128 new) > +{ > + u128 old = *oldp; > + int cc; > + > + asm volatile( > + " cdsg %[old],%[new],%[ptr]\n" > + " ipm %[cc]\n" > + " srl %[cc],28\n" > + : [cc] "=&d" (cc), [old] "+&d" (old) > + : [new] "d" (new), > + [ptr] "QS" (*(unsigned long *)ptr) > + : "memory", "cc"); > + > + if (unlikely(!cc)) > + *oldp = old; > + > + return likely(cc); > +} > + > #endif /* __ASM_CMPXCHG_H */ > --- a/arch/x86/include/asm/cmpxchg_32.h > +++ b/arch/x86/include/asm/cmpxchg_32.h > @@ -103,6 +103,7 @@ static inline bool __try_cmpxchg64(volat > > #endif > > -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) > +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX8) > +#define system_has_cmpxchg64() boot_cpu_has(X86_FEATURE_CX8) > > #endif /* _ASM_X86_CMPXCHG_32_H */ > --- a/arch/x86/include/asm/cmpxchg_64.h > +++ b/arch/x86/include/asm/cmpxchg_64.h > @@ -20,6 +20,59 @@ > arch_try_cmpxchg((ptr), (po), (n)); \ > }) > > -#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) > +union __u128_halves { > + u128 full; > + struct { > + u64 low, high; > + }; > +}; > + > +static __always_inline u128 arch_cmpxchg128(volatile u128 *ptr, u128 old, u128 new) > +{ > + union __u128_halves o = { .full = old, }, n = { .full = new, }; > + > + asm volatile(LOCK_PREFIX "cmpxchg16b %[ptr]" > + : [ptr] "+m" (*ptr), > + "+a" (o.low), "+d" (o.high) > + : "b" (n.low), "c" (n.high) > + : "memory"); > + > + return o.full; > +} > + > +static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old, u128 new) > +{ > + union __u128_halves o = { .full = old, }, n = { .full = new, }; > + > + asm volatile("cmpxchg16b %[ptr]" > + : [ptr] "+m" (*ptr), > + "+a" (o.low), "+d" (o.high) > + : "b" (n.low), "c" (n.high) > + : "memory"); > + > + return o.full; > +} > + > +static __always_inline bool arch_try_cmpxchg128(volatile u128 *ptr, u128 *old, u128 new) > +{ > + union __u128_halves o = { .full = *old, }, n = { .full = new, }; > + bool ret; > + > + asm volatile(LOCK_PREFIX "cmpxchg16b %[ptr]" > + CC_SET(e) > + : CC_OUT(e) (ret), > + [ptr] "+m" (*ptr), > + "+a" (o.low), "+d" (o.high) > + : "b" (n.low), "c" (n.high) > + : "memory"); > + > + if (unlikely(!ret)) > + *old = o.full; > + > + return likely(ret); > +} > + > +#define system_has_cmpxchg_double() boot_cpu_has(X86_FEATURE_CX16) > +#define system_has_cmpxchg128() boot_cpu_has(X86_FEATURE_CX16) > > #endif /* _ASM_X86_CMPXCHG_64_H */ > >