[PATCH] mm/list_lru: allocate on first insert instead of allocation

Jingxiang Zeng <jingxiangzeng.cas@xxxxxxxxx> · Fri, 28 Feb 2025 19:38:36 +0800

From: Zeng Jingxiang <linuszeng@xxxxxxxxxxx>

It is observed that each time the memcg_slab_post_alloc_hook function
and the zswap_store function are executed, xa_load will be executed
once through the following path, which adds unnecessary overhead.
This patch optimizes this part of the code. When a new mlru is
inserted into list_lru, xa_load is only executed once, and other slab
requests of the same type will not be executed repeatedly.

__memcg_slab_post_alloc_hook
->memcg_list_lru_alloc
->->memcg_list_lru_allocated
->->->xa_load

zswap_store
->memcg_list_lru_alloc
->->memcg_list_lru_allocated
->->->xa_load

We created 1,000,000 negative dentry on test machines with 10, 1,000,
and 10,000 cgroups for performance testing, and then used the bcc
funclatency tool to capture the time consumption of the
kmem_cache_alloc_lru_noprof function. The performance improvement
ranged from 3.3% to 6.2%:

10 cgroups, 3.3% performance improvement.
without the patch:
avg = 1375 nsecs, total: 1375684993 nsecs, count: 1000000
with the patch:
avg = 1331 nsecs, total: 1331625726 nsecs, count: 1000000

1000 cgroups, 3.7% performance improvement.
without the patch:
avg = 1364 nsecs, total: 1364564848 nsecs, count: 1000000
with the patch:
avg = 1315 nsecs, total: 1315150414 nsecs, count: 1000000

10000 cgroups, 6.2% performance improvement.
without the patch:
avg = 1385 nsecs, total: 1385361153 nsecs, count: 1000002
with the patch:
avg = 1304 nsecs, total: 1304531155 nsecs, count: 1000000

Signed-off-by: Zeng Jingxiang <linuszeng@xxxxxxxxxxx>
Suggested-by: Kairui Song <kasong@xxxxxxxxxxx>
---
 include/linux/list_lru.h |  2 --
 mm/list_lru.c            | 22 +++++++++++++++-------
 mm/memcontrol.c          | 16 ++--------------
 mm/slab.h                |  4 ++--
 mm/slub.c                | 20 +++++++++-----------
 mm/zswap.c               |  9 ---------
 6 files changed, 28 insertions(+), 45 deletions(-)

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index fe739d35a864..04d4b051f618 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -79,8 +79,6 @@ static inline int list_lru_init_memcg_key(struct list_lru *lru, struct shrinker
 	return list_lru_init_memcg(lru, shrinker);
 }
 
-int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
-			 gfp_t gfp);
 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
 
 /**
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 490473af3122..c5a5d61ac946 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -49,6 +49,8 @@ static int lru_shrinker_id(struct list_lru *lru)
 	return lru->shrinker_id;
 }
 
+static int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru);
+
 static inline struct list_lru_one *
 list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
 {
@@ -84,6 +86,9 @@ lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 			spin_unlock_irq(&l->lock);
 		else
 			spin_unlock(&l->lock);
+	} else {
+		if (!memcg_list_lru_alloc(memcg, lru))
+			goto again;
 	}
 	/*
 	 * Caller may simply bail out if raced with reparenting or
@@ -93,7 +98,6 @@ lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 		rcu_read_unlock();
 		return NULL;
 	}
-	VM_WARN_ON(!css_is_dying(&memcg->css));
 	memcg = parent_mem_cgroup(memcg);
 	goto again;
 }
@@ -506,18 +510,16 @@ static inline bool memcg_list_lru_allocated(struct mem_cgroup *memcg,
 	return idx < 0 || xa_load(&lru->xa, idx);
 }
 
-int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
-			 gfp_t gfp)
+static int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru)
 {
 	unsigned long flags;
 	struct list_lru_memcg *mlru = NULL;
-	struct mem_cgroup *pos, *parent;
+	struct mem_cgroup *pos, *parent, *cur;
 	XA_STATE(xas, &lru->xa, 0);
 
 	if (!list_lru_memcg_aware(lru) || memcg_list_lru_allocated(memcg, lru))
 		return 0;
 
-	gfp &= GFP_RECLAIM_MASK;
 	/*
 	 * Because the list_lru can be reparented to the parent cgroup's
 	 * list_lru, we should make sure that this cgroup and all its
@@ -536,11 +538,13 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
 		}
 
 		if (!mlru) {
-			mlru = memcg_init_list_lru_one(lru, gfp);
+			mlru = memcg_init_list_lru_one(lru, GFP_KERNEL);
 			if (!mlru)
 				return -ENOMEM;
 		}
 		xas_set(&xas, pos->kmemcg_id);
+		/* We could be scanning items in another memcg */
+		cur = set_active_memcg(pos);
 		do {
 			xas_lock_irqsave(&xas, flags);
 			if (!xas_load(&xas) && !css_is_dying(&pos->css)) {
@@ -549,12 +553,16 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
 					mlru = NULL;
 			}
 			xas_unlock_irqrestore(&xas, flags);
-		} while (xas_nomem(&xas, gfp));
+		} while (xas_nomem(&xas, GFP_KERNEL));
+		set_active_memcg(cur);
 	} while (pos != memcg && !css_is_dying(&pos->css));
 
 	if (unlikely(mlru))
 		kfree(mlru);
 
+	if (css_is_dying(&pos->css))
+		return -EBUSY;
+
 	return xas_error(&xas);
 }
 #else
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 16f3bdbd37d8..583e2587c17b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2966,8 +2966,8 @@ static inline size_t obj_full_size(struct kmem_cache *s)
 	return s->size + sizeof(struct obj_cgroup *);
 }
 
-bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
-				  gfp_t flags, size_t size, void **p)
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
+				  size_t size, void **p)
 {
 	struct obj_cgroup *objcg;
 	struct slab *slab;
@@ -2994,18 +2994,6 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 
 	flags &= gfp_allowed_mask;
 
-	if (lru) {
-		int ret;
-		struct mem_cgroup *memcg;
-
-		memcg = get_mem_cgroup_from_objcg(objcg);
-		ret = memcg_list_lru_alloc(memcg, lru, flags);
-		css_put(&memcg->css);
-
-		if (ret)
-			return false;
-	}
-
 	if (obj_cgroup_charge(objcg, flags, size * obj_full_size(s)))
 		return false;
 
diff --git a/mm/slab.h b/mm/slab.h
index e9fd9bf0bfa6..3b20298d2ea1 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -598,8 +598,8 @@ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s)
 }
 
 #ifdef CONFIG_MEMCG
-bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
-				  gfp_t flags, size_t size, void **p);
+bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
+				  size_t size, void **p);
 void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
 			    void **p, int objects, struct slabobj_ext *obj_exts);
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index 184fd2b14758..545c4b5f2bf2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2153,8 +2153,8 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
 static void memcg_alloc_abort_single(struct kmem_cache *s, void *object);
 
 static __fastpath_inline
-bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
-				gfp_t flags, size_t size, void **p)
+bool memcg_slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
+				size_t size, void **p)
 {
 	if (likely(!memcg_kmem_online()))
 		return true;
@@ -2162,7 +2162,7 @@ bool memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 	if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
 		return true;
 
-	if (likely(__memcg_slab_post_alloc_hook(s, lru, flags, size, p)))
+	if (likely(__memcg_slab_post_alloc_hook(s, flags, size, p)))
 		return true;
 
 	if (likely(size == 1)) {
@@ -2241,12 +2241,11 @@ bool memcg_slab_post_charge(void *p, gfp_t flags)
 			return true;
 	}
 
-	return __memcg_slab_post_alloc_hook(s, NULL, flags, 1, &p);
+	return __memcg_slab_post_alloc_hook(s, flags, 1, &p);
 }
 
 #else /* CONFIG_MEMCG */
 static inline bool memcg_slab_post_alloc_hook(struct kmem_cache *s,
-					      struct list_lru *lru,
 					      gfp_t flags, size_t size,
 					      void **p)
 {
@@ -4085,9 +4084,8 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
 }
 
 static __fastpath_inline
-bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
-			  gfp_t flags, size_t size, void **p, bool init,
-			  unsigned int orig_size)
+bool slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, size_t size,
+			  void **p, bool init, unsigned int orig_size)
 {
 	unsigned int zero_size = s->object_size;
 	bool kasan_init = init;
@@ -4135,7 +4133,7 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 		alloc_tagging_slab_alloc_hook(s, p[i], flags);
 	}
 
-	return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
+	return memcg_slab_post_alloc_hook(s, flags, size, p);
 }
 
 /*
@@ -4174,7 +4172,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 	 * In case this fails due to memcg_slab_post_alloc_hook(),
 	 * object is set to NULL
 	 */
-	slab_post_alloc_hook(s, lru, gfpflags, 1, &object, init, orig_size);
+	slab_post_alloc_hook(s, gfpflags, 1, &object, init, orig_size);
 
 	return object;
 }
@@ -5135,7 +5133,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
 	 * memcg and kmem_cache debug support and memory initialization.
 	 * Done outside of the IRQ disabled fastpath loop.
 	 */
-	if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
+	if (unlikely(!slab_post_alloc_hook(s, flags, size, p,
 		    slab_want_init_on_alloc(flags, s), s->object_size))) {
 		return 0;
 	}
diff --git a/mm/zswap.c b/mm/zswap.c
index 10f2a16e7586..178728a936ed 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -1562,15 +1562,6 @@ bool zswap_store(struct folio *folio)
 	if (!pool)
 		goto put_objcg;
 
-	if (objcg) {
-		memcg = get_mem_cgroup_from_objcg(objcg);
-		if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
-			mem_cgroup_put(memcg);
-			goto put_pool;
-		}
-		mem_cgroup_put(memcg);
-	}
-
 	for (index = 0; index < nr_pages; ++index) {
 		struct page *page = folio_page(folio, index);
 
-- 
2.43.5