Re: [PATCH 1/3] mm/slub: fix the race between validate_slab and slab_free

David Rientjes <rientjes@xxxxxxxxxx> · Mon, 30 May 2022 14:14:46 -0700 (PDT)

On Sun, 29 May 2022, Hyeonggon Yoo wrote:

> > diff --git a/mm/slub.c b/mm/slub.c
> > index ed5c2c03a47a..310e56d99116 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -1374,15 +1374,12 @@ static noinline int free_debug_processing(
> >  	void *head, void *tail, int bulk_cnt,
> >  	unsigned long addr)
> >  {
> > -	struct kmem_cache_node *n = get_node(s, slab_nid(slab));
> >  	void *object = head;
> >  	int cnt = 0;
> > -	unsigned long flags, flags2;
> > +	unsigned long flags;
> >  	int ret = 0;
> >  
> > -	spin_lock_irqsave(&n->list_lock, flags);
> > -	slab_lock(slab, &flags2);
> > -
> > +	slab_lock(slab, &flags);
> >  	if (s->flags & SLAB_CONSISTENCY_CHECKS) {
> >  		if (!check_slab(s, slab))
> >  			goto out;
> > @@ -1414,8 +1411,7 @@ static noinline int free_debug_processing(
> >  		slab_err(s, slab, "Bulk freelist count(%d) invalid(%d)\n",
> >  			 bulk_cnt, cnt);
> >  
> > -	slab_unlock(slab, &flags2);
> > -	spin_unlock_irqrestore(&n->list_lock, flags);
> > +	slab_unlock(slab, &flags);
> >  	if (!ret)
> >  		slab_fix(s, "Object at 0x%p not freed", object);
> >  	return ret;
> > @@ -3304,7 +3300,7 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
> >  
> >  {
> >  	void *prior;
> > -	int was_frozen;
> > +	int was_frozen, to_take_off = 0;
> >  	struct slab new;
> >  	unsigned long counters;
> >  	struct kmem_cache_node *n = NULL;
> > @@ -3315,15 +3311,19 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
> >  	if (kfence_free(head))
> >  		return;
> >  
> > +	n = get_node(s, slab_nid(slab));
> > +	spin_lock_irqsave(&n->list_lock, flags);
> > +
> 
> Oh please don't do this.
> 
> SLUB free slowpath can be hit a lot depending on workload.
> 
> __slab_free() try its best not to take n->list_lock. currently takes n->list_lock
> only when the slab need to be taken from list.
> 
> Unconditionally taking n->list_lock will degrade performance.
> 

This is a good point, it would be useful to gather some benchmarks for 
workloads that are known to thrash some caches and would hit this path 
such as netperf TCP_RR.