On Tue, May 30, 2023 at 04:22:32PM +0200, Peter Zijlstra wrote: > Yet another alternative is using a struct type and an equality function, > just for this. The best I could come up with in the regard is the below. It builds on HPPA64 and x86_64, but I've not ran it yet. (also, the introduction of this_cpu_try_cmpxchg() should probably be split out into its own patch) --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -99,6 +99,15 @@ do { \ __ret; \ }) +#define raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \ +({ \ + typeof(pcp) __ret, __old = *(ovalp); \ + __ret = raw_cpu_cmpxchg(pcp, __old, nval); \ + if (!likely(__ret == __old)) \ + *(ovalp) = __ret; \ + likely(__ret == __old); \ +}) + #define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ typeof(pcp) ___ret; \ @@ -167,6 +176,15 @@ do { \ __ret; \ }) +#define this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) \ +({ \ + typeof(pcp) __ret, __old = *(ovalp); \ + __ret = this_cpu_cmpxchg(pcp, __old, nval); \ + if (!likely(__ret == __old)) \ + *(ovalp) = __ret; \ + likely(__ret == __old); \ +}) + #ifndef raw_cpu_read_1 #define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp) #endif @@ -258,6 +276,36 @@ do { \ #define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) #endif +#ifndef __SIZEOF_INT128__ +#define raw_cpu_generic_try_cmpxchg_memcmp(pcp, ovalp, nval) \ +({ \ + typeof(pcp) *__p = raw_cpu_ptr(&(pcp)); \ + typeof(pcp) __ret, __old = *(ovalp); \ + bool __s; \ + __ret = *__p; \ + if (!__builtin_memcmp(&__ret, &__old, sizeof(pcp))) { \ + *__p = nval; \ + __s = true; \ + } else { \ + *(ovalp) = __ret; \ + __s = false; \ + } \ + __s; \ +}) + +#define raw_cpu_generic_cmpxchg_memcmp(pcp, oval, nval) \ +({ \ + typeof(pcp) __old = (oval); \ + raw_cpu_generic_try_cmpxchg_memcpy(pcp, &__old, nval); \ + __old; \ +}) + +#define raw_cpu_cmpxchg128(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg_memcmp(pcp, oval, nval) +#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg_memcmp(pcp, ovalp, nval) +#endif + #ifndef raw_cpu_cmpxchg_1 #define raw_cpu_cmpxchg_1(pcp, oval, nval) \ raw_cpu_generic_cmpxchg(pcp, oval, nval) @@ -283,6 +331,31 @@ do { \ raw_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#ifndef raw_cpu_try_cmpxchg_1 +#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef raw_cpu_try_cmpxchg_2 +#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef raw_cpu_try_cmpxchg_4 +#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef raw_cpu_try_cmpxchg_8 +#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef raw_cpu_try_cmpxchg64 +#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef raw_cpu_try_cmpxchg128 +#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + raw_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif + #ifndef this_cpu_read_1 #define this_cpu_read_1(pcp) this_cpu_generic_read(pcp) #endif @@ -374,6 +447,33 @@ do { \ #define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval) #endif +#ifndef __SIZEOF_INT128__ +#define this_cpu_generic_try_cmpxchg_memcmp(pcp, ovalp, nval) \ +({ \ + bool __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_generic_try_cmpxchg_memcmp(pcp, ovalp, nval); \ + raw_local_irq_restore(__flags); \ + __ret; \ +}) + +#define this_cpu_generic_cmpxchg_memcmp(pcp, oval, nval) \ +({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_generic_cmpxchg_memcmp(pcp, oval, nval); \ + raw_local_irq_restore(__flags); \ + __ret; \ +}) + +#define this_cpu_cmpxchg128(pcp, oval, nval) \ + this_cpu_generic_cmpxchg_memcmp(pcp, oval, nval) +#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg_memcmp(pcp, ovalp, nval) +#endif + #ifndef this_cpu_cmpxchg_1 #define this_cpu_cmpxchg_1(pcp, oval, nval) \ this_cpu_generic_cmpxchg(pcp, oval, nval) @@ -399,4 +499,29 @@ do { \ this_cpu_generic_cmpxchg(pcp, oval, nval) #endif +#ifndef this_cpu_try_cmpxchg_1 +#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef this_cpu_try_cmpxchg_2 +#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef this_cpu_try_cmpxchg_4 +#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef this_cpu_try_cmpxchg_8 +#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef this_cpu_try_cmpxchg64 +#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif +#ifndef this_cpu_try_cmpxchg128 +#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) \ + this_cpu_generic_try_cmpxchg(pcp, ovalp, nval) +#endif + #endif /* _ASM_GENERIC_PERCPU_H_ */ --- a/include/linux/types.h +++ b/include/linux/types.h @@ -13,6 +13,13 @@ #ifdef __SIZEOF_INT128__ typedef __s128 s128; typedef __u128 u128; +#else +#ifdef CONFIG_64BIT +/* hack for this_cpu_cmpxchg128 */ +typedef struct { + u64 a, b; +} u128 __attribute__((aligned(16))); +#endif #endif typedef u32 __kernel_dev_t; --- a/mm/slab.h +++ b/mm/slab.h @@ -11,14 +11,14 @@ void __init kmem_cache_init(void); # define system_has_freelist_aba() system_has_cmpxchg128() # define try_cmpxchg_freelist try_cmpxchg128 # endif -#define this_cpu_cmpxchg_freelist this_cpu_cmpxchg128 +#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg128 typedef u128 freelist_full_t; #else /* CONFIG_64BIT */ # ifdef system_has_cmpxchg64 # define system_has_freelist_aba() system_has_cmpxchg64() # define try_cmpxchg_freelist try_cmpxchg64 # endif -#define this_cpu_cmpxchg_freelist this_cpu_cmpxchg64 +#define this_cpu_try_cmpxchg_freelist this_cpu_try_cmpxchg64 typedef u64 freelist_full_t; #endif /* CONFIG_64BIT */ --- a/mm/slub.c +++ b/mm/slub.c @@ -3037,8 +3037,8 @@ __update_cpu_freelist_fast(struct kmem_c freelist_aba_t old = { .freelist = freelist_old, .counter = tid }; freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) }; - return this_cpu_cmpxchg_freelist(s->cpu_slab->freelist_tid.full, - old.full, new.full) == old.full; + return this_cpu_try_cmpxchg_freelist(s->cpu_slab->freelist_tid.full, + &old.full, new.full); } /*