Re: [PATCH] mm/list_lru: allocate on first insert instead of allocation

Vlastimil Babka <vbabka@xxxxxxx> · Sat, 1 Mar 2025 19:32:50 +0100

On 2/28/25 12:38, Jingxiang Zeng wrote:
> From: Zeng Jingxiang <linuszeng@xxxxxxxxxxx>
> 
> It is observed that each time the memcg_slab_post_alloc_hook function
> and the zswap_store function are executed, xa_load will be executed
> once through the following path, which adds unnecessary overhead.
> This patch optimizes this part of the code. When a new mlru is
> inserted into list_lru, xa_load is only executed once, and other slab
> requests of the same type will not be executed repeatedly.
> 
> __memcg_slab_post_alloc_hook
> ->memcg_list_lru_alloc
> ->->memcg_list_lru_allocated
> ->->->xa_load
> 
> zswap_store
> ->memcg_list_lru_alloc
> ->->memcg_list_lru_allocated
> ->->->xa_load

How do you know it's xa_load itself that's the issue?

I think you might be able to eliminate some call overhead easily:
- move list_lru_memcg_aware() and memcg_list_lru_allocated() to list_lru.h
- make memcg_list_lru_alloc() also a static inline in list_lru.h, so it does
the list_lru_memcg_aware() and memcg_list_lru_allocated() checks inline (can
be even likely()) and then call __memcg_list_lru_alloc() which is renamed
from the current memcg_list_lru_alloc() but the checks moved away.

The result is callers of memcg_list_lru_alloc() will (in the likely case)
only perform a direct call to xa_load() in xarray.c and not additional call
through memcg_list_lru_alloc() in list_lru.c

> We created 1,000,000 negative dentry on test machines with 10, 1,000,
> and 10,000 cgroups for performance testing, and then used the bcc
> funclatency tool to capture the time consumption of the
> kmem_cache_alloc_lru_noprof function. The performance improvement
> ranged from 3.3% to 6.2%:
> 
> 10 cgroups, 3.3% performance improvement.
> without the patch:
> avg = 1375 nsecs, total: 1375684993 nsecs, count: 1000000
> with the patch:
> avg = 1331 nsecs, total: 1331625726 nsecs, count: 1000000
> 
> 1000 cgroups, 3.7% performance improvement.
> without the patch:
> avg = 1364 nsecs, total: 1364564848 nsecs, count: 1000000
> with the patch:
> avg = 1315 nsecs, total: 1315150414 nsecs, count: 1000000
> 
> 10000 cgroups, 6.2% performance improvement.
> without the patch:
> avg = 1385 nsecs, total: 1385361153 nsecs, count: 1000002
> with the patch:
> avg = 1304 nsecs, total: 1304531155 nsecs, count: 1000000
> 
> Signed-off-by: Zeng Jingxiang <linuszeng@xxxxxxxxxxx>
> Suggested-by: Kairui Song <kasong@xxxxxxxxxxx>
> ---
>  include/linux/list_lru.h |  2 --
>  mm/list_lru.c            | 22 +++++++++++++++-------
>  mm/memcontrol.c          | 16 ++--------------
>  mm/slab.h                |  4 ++--
>  mm/slub.c                | 20 +++++++++-----------
>  mm/zswap.c               |  9 ---------
>  6 files changed, 28 insertions(+), 45 deletions(-)
> 
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index fe739d35a864..04d4b051f618 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -79,8 +79,6 @@ static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker
>  	return list_lru_init_memcg(lru, shrinker);
>  }
>  
> -int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
> -			 gfp_t gfp);
>  void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
>  
>  /**
> diff --git a/mm/list_lru.c b/mm/list_lru.c
> index 490473af3122..c5a5d61ac946 100644
> --- a/mm/list_lru.c
> +++ b/mm/list_lru.c
> @@ -49,6 +49,8 @@ static int lru_shrinker_id(struct list_lru *lru)
>  	return lru->shrinker_id;
>  }
>  
> +static int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru);
> +
>  static inline struct list_lru_one *
>  list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
>  {
> @@ -84,6 +86,9 @@ lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
>  			spin_unlock_irq(&l->lock);
>  		else
>  			spin_unlock(&l->lock);
> +	} else {
> +		if (!memcg_list_lru_alloc(memcg, lru))
> +			goto again;
>  	}
>  	/*
>  	 * Caller may simply bail out if raced with reparenting or
> @@ -93,7 +98,6 @@ lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
>  		rcu_read_unlock();
>  		return NULL;
>  	}
> -	VM_WARN_ON(!css_is_dying(&memcg->css));
>  	memcg = parent_mem_cgroup(memcg);
>  	goto again;
>  }
> @@ -506,18 +510,16 @@ static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
>  	return idx < 0 || xa_load(&lru->xa, idx);
>  }
>  
> -int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
> -			 gfp_t gfp)
> +static int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru)
>  {
>  	unsigned long flags;
>  	struct list_lru_memcg *mlru = NULL;
> -	struct mem_cgroup *pos, *parent;
> +	struct mem_cgroup *pos, *parent, *cur;
>  	XA_STATE(xas, &lru->xa, 0);
>  
>  	if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
>  		return 0;
>  
> -	gfp &= GFP_RECLAIM_MASK;
>  	/*
>  	 * Because the list_lru can be reparented to the parent cgroup's
>  	 * list_lru, we should make sure that this cgroup and all its
> @@ -536,11 +538,13 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
>  		}
>  
>  		if (!mlru) {
> -			mlru = memcg_init_list_lru_one(lru, gfp);
> +			mlru = memcg_init_list_lru_one(lru, GFP_KERNEL);
>  			if (!mlru)
>  				return -ENOMEM;
>  		}
>  		xas_set(&xas, pos->kmemcg_id);
> +		/* We could be scanning items in another memcg */
> +		cur = set_active_memcg(pos);
>  		do {
>  			xas_lock_irqsave(&xas, flags);
>  			if (!xas_load(&xas) && !css_is_dying(&pos->css)) {
> @@ -549,12 +553,16 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
>  					mlru = NULL;
>  			}
>  			xas_unlock_irqrestore(&xas, flags);
> -		} while (xas_nomem(&xas, gfp));
> +		} while (xas_nomem(&xas, GFP_KERNEL));
> +		set_active_memcg(cur);
>  	} while (pos != memcg && !css_is_dying(&pos->css));
>  
>  	if (unlikely(mlru))
>  		kfree(mlru);
>  
> +	if (css_is_dying(&pos->css))
> +		return -EBUSY;
> +
>  	return xas_error(&xas);
>  }
>  #else
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 16f3bdbd37d8..583e2587c17b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2966,8 +2966,8 @@ static inline size_t obj_full_size(struct kmem_cache *s)
>  	return s->size + sizeof(struct obj_cgroup *);
>  }
>  
> -bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> -				  gfp_t flags, size_t size, void **p)
> +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
> +				  size_t size, void **p)
>  {
>  	struct obj_cgroup *objcg;
>  	struct slab *slab;
> @@ -2994,18 +2994,6 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>  
>  	flags &= gfp_allowed_mask;
>  
> -	if (lru) {
> -		int ret;
> -		struct mem_cgroup *memcg;
> -
> -		memcg = get_mem_cgroup_from_objcg(objcg);
> -		ret = memcg_list_lru_alloc(memcg, lru, flags);
> -		css_put(&memcg->css);
> -
> -		if (ret)
> -			return false;
> -	}
> -
>  	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
>  		return false;
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index e9fd9bf0bfa6..3b20298d2ea1 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -598,8 +598,8 @@ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
>  }
>  
>  #ifdef CONFIG_MEMCG
> -bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> -				  gfp_t flags, size_t size, void **p);
> +bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
> +				  size_t size, void **p);
>  void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
>  			    void **p, int objects, struct slabobj_ext *obj_exts);
>  #endif
> diff --git a/mm/slub.c b/mm/slub.c
> index 184fd2b14758..545c4b5f2bf2 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2153,8 +2153,8 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
>  static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
>  
>  static __fastpath_inline
> -bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> -				gfp_t flags, size_t size, void **p)
> +bool memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
> +				size_t size, void **p)
>  {
>  	if (likely(!memcg_kmem_online()))
>  		return true;
> @@ -2162,7 +2162,7 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>  	if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
>  		return true;
>  
> -	if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
> +	if (likely(__memcg_slab_post_alloc_hook(s, flags, size, p)))
>  		return true;
>  
>  	if (likely(size == 1)) {
> @@ -2241,12 +2241,11 @@ bool memcg_slab_post_charge(void *p, gfp_t flags)
>  			return true;
>  	}
>  
> -	return __memcg_slab_post_alloc_hook(s, NULL, flags, 1, &p);
> +	return __memcg_slab_post_alloc_hook(s, flags, 1, &p);
>  }
>  
>  #else /* CONFIG_MEMCG */
>  static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
> -					      struct list_lru *lru,
>  					      gfp_t flags, size_t size,
>  					      void **p)
>  {
> @@ -4085,9 +4084,8 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
>  }
>  
>  static __fastpath_inline
> -bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> -			  gfp_t flags, size_t size, void **p, bool init,
> -			  unsigned int orig_size)
> +bool slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, size_t size,
> +			  void **p, bool init, unsigned int orig_size)
>  {
>  	unsigned int zero_size = s->object_size;
>  	bool kasan_init = init;
> @@ -4135,7 +4133,7 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>  		alloc_tagging_slab_alloc_hook(s, p[i], flags);
>  	}
>  
> -	return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
> +	return memcg_slab_post_alloc_hook(s, flags, size, p);
>  }
>  
>  /*
> @@ -4174,7 +4172,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
>  	 * In case this fails due to memcg_slab_post_alloc_hook(),
>  	 * object is set to NULL
>  	 */
> -	slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);
> +	slab_post_alloc_hook(s, gfpflags, 1, &object, init, orig_size);
>  
>  	return object;
>  }
> @@ -5135,7 +5133,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
>  	 * memcg and kmem_cache debug support and memory initialization.
>  	 * Done outside of the IRQ disabled fastpath loop.
>  	 */
> -	if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
> +	if (unlikely(!slab_post_alloc_hook(s, flags, size, p,
>  		    slab_want_init_on_alloc(flags, s), s->object_size))) {
>  		return 0;
>  	}
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 10f2a16e7586..178728a936ed 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -1562,15 +1562,6 @@ bool zswap_store(struct folio *folio)
>  	if (!pool)
>  		goto put_objcg;
>  
> -	if (objcg) {
> -		memcg = get_mem_cgroup_from_objcg(objcg);
> -		if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
> -			mem_cgroup_put(memcg);
> -			goto put_pool;
> -		}
> -		mem_cgroup_put(memcg);
> -	}
> -
>  	for (index = 0; index < nr_pages; ++index) {
>  		struct page *page = folio_page(folio, index);
>