On Wed, May 24, 2023 at 11:32:47AM +0200, Peter Zijlstra wrote: > On Mon, May 15, 2023 at 09:57:07AM +0200, Peter Zijlstra wrote: > > > @@ -3008,6 +3029,22 @@ static inline bool pfmemalloc_match(stru > > } > > > > #ifndef CONFIG_SLUB_TINY > > +static inline bool > > +__update_cpu_freelist_fast(struct kmem_cache *s, > > + void *freelist_old, void *freelist_new, > > + unsigned long tid) > > +{ > > +#ifdef system_has_freelist_aba > > + freelist_aba_t old = { .freelist = freelist_old, .counter = tid }; > > + freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) }; > > + > > + return this_cpu_cmpxchg_freelist(s->cpu_slab->freelist_tid.full, > > + old.full, new.full) == old.full; > > +#else > > + return false; > > +#endif > > +} > > + > > /* > > * Check the slab->freelist and either transfer the freelist to the > > * per cpu freelist or deactivate the slab. > > @@ -3359,11 +3396,7 @@ static __always_inline void *__slab_allo > > * against code executing on this cpu *not* from access by > > * other cpus. > > */ > > - if (unlikely(!this_cpu_cmpxchg_double( > > - s->cpu_slab->freelist, s->cpu_slab->tid, > > - object, tid, > > - next_object, next_tid(tid)))) { > > - > > + if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) { > > note_cmpxchg_failure("slab_alloc", s, tid); > > goto redo; > > } > > @@ -3736,11 +3769,7 @@ static __always_inline void do_slab_free > > > > set_freepointer(s, tail_obj, freelist); > > > > - if (unlikely(!this_cpu_cmpxchg_double( > > - s->cpu_slab->freelist, s->cpu_slab->tid, > > - freelist, tid, > > - head, next_tid(tid)))) { > > - > > + if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) { > > note_cmpxchg_failure("slab_free", s, tid); > > goto redo; > > } > > This isn't right; the this_cpu_cmpxchg_double() was unconditional and > relied on the local_irq_save() fallback when no native cmpxchg128 is > present. This means this_cpu_cmpxchg128 is expected to be present on all 64bit archs, except Mark just found out that HPPA doens't support __int128 until gcc-11. (I've been building using gcc-12.2) And because the cmpxchg128 fallback relies on '==' we can't trivally fudge that with a struct type either :/ Now, afaict it all magically works if I use: #ifdef __SIZEOF_INT128__ typedef __s128 s128 typedef __u128 u128 #else #if defined(CONFIG_PARISC) && defined(CONFIG_64BIT) typedef long double u128; #endif #endif but that is *super* gross. The alternative is raising the minimum GCC for PARISC to gcc-11.. Yet another alternative is using a struct type and an equality function, just for this. Anybody?