> > so that when you read that function on its own, it's clear that the > > lock is always held over that critical section - and the issue is that > > perhaps the lock was already taken by the caller. > > Actually that "already taken" becomes "caller does not need it/can't > even take the local lock as it's not local" (it's a cpu hot remove > handler on behalf of another, dead cpu). > > So would it work with something like the following cleanup on top later > after proper testing? (now just compile tested). Scroll downward... > ---8<--- > diff --git a/mm/slub.c b/mm/slub.c > index df1ac8aff86f..0d9e63e918f1 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -2566,38 +2566,33 @@ static inline void unfreeze_partials_cpu(struct kmem_cache *s, > > #endif /* CONFIG_SLUB_CPU_PARTIAL */ > > -static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, > - bool lock) > +static inline struct page * > +__detach_cpu_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, > + void **freelist) > { > - unsigned long flags; > - void *freelist; > struct page *page; > > - if (lock) > - local_lock_irqsave(&s->cpu_slab->lock, flags); > - > - freelist = c->freelist; > page = c->page; > + *freelist = c->freelist; > > c->page = NULL; > c->freelist = NULL; > c->tid = next_tid(c->tid); > > - if (lock) > - local_unlock_irqrestore(&s->cpu_slab->lock, flags); > - > - if (page) > - deactivate_slab(s, page, freelist); > - > - stat(s, CPUSLAB_FLUSH); > + return page; > } > > static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) > { > struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); > + struct page *page; > + void *freelist; > > - if (c->page) > - flush_slab(s, c, false); > + if (c->page) { > + page = __detach_cpu_slab(s, c, &freelist); > + deactivate_slab(s, page, freelist); > + stat(s, CPUSLAB_FLUSH); > + } > > unfreeze_partials_cpu(s, c); > } > @@ -2618,14 +2613,24 @@ static void flush_cpu_slab(struct work_struct *w) > struct kmem_cache *s; > struct kmem_cache_cpu *c; > struct slub_flush_work *sfw; > + struct page *page; > + void *freelist; > + unsigned long flags; > > sfw = container_of(w, struct slub_flush_work, work); > > s = sfw->s; > c = this_cpu_ptr(s->cpu_slab); > > - if (c->page) > - flush_slab(s, c, true); > + if (c->page) { > + local_lock_irqsave(&s->cpu_slab->lock, flags); > + page = __detach_cpu_slab(s, c, &freelist); > + local_unlock_irqrestore(&s->cpu_slab->lock, flags); > + > + if (page) > + deactivate_slab(s, page, freelist); > + stat(s, CPUSLAB_FLUSH); > + } > > unfreeze_partials(s); > } To my eyeballs, below duplication of a couple lines of initialization needed by the lockless function is less icky than the double return. --- mm/slub.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) --- a/mm/slub.c +++ b/mm/slub.c @@ -2566,15 +2566,13 @@ static inline void unfreeze_partials_cpu #endif /* CONFIG_SLUB_CPU_PARTIAL */ -static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c, - bool lock) +static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) { unsigned long flags; void *freelist; struct page *page; - if (lock) - local_lock_irqsave(&s->cpu_slab->lock, flags); + local_lock_irqsave(&s->cpu_slab->lock, flags); freelist = c->freelist; page = c->page; @@ -2583,8 +2581,7 @@ static inline void flush_slab(struct kme c->freelist = NULL; c->tid = next_tid(c->tid); - if (lock) - local_unlock_irqrestore(&s->cpu_slab->lock, flags); + local_unlock_irqrestore(&s->cpu_slab->lock, flags); if (page) deactivate_slab(s, page, freelist); @@ -2595,11 +2592,19 @@ static inline void flush_slab(struct kme static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) { struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); + struct page *page = c->page; + void *freelist = c->freelist; - if (c->page) - flush_slab(s, c, false); + c->page = NULL; + c->freelist = NULL; + c->tid = next_tid(c->tid); + + if (page) + deactivate_slab(s, page, freelist); unfreeze_partials_cpu(s, c); + + stat(s, CPUSLAB_FLUSH); } struct slub_flush_work { @@ -2625,7 +2630,7 @@ static void flush_cpu_slab(struct work_s c = this_cpu_ptr(s->cpu_slab); if (c->page) - flush_slab(s, c, true); + flush_slab(s, c); unfreeze_partials(s); }