+ mm-zsmalloc-share-slab-caches-for-all-zsmalloc-zpools.patch added to mm-unstable branch

Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> · Tue, 04 Jun 2024 15:20:20 -0700

The patch titled
     Subject: mm: zsmalloc: share slab caches for all zsmalloc zpools
has been added to the -mm mm-unstable branch.  Its filename is
     mm-zsmalloc-share-slab-caches-for-all-zsmalloc-zpools.patch

This patch will shortly appear at
     https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-zsmalloc-share-slab-caches-for-all-zsmalloc-zpools.patch

This patch will later appear in the mm-unstable branch at
    git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days

------------------------------------------------------
From: Yosry Ahmed <yosryahmed@xxxxxxxxxx>
Subject: mm: zsmalloc: share slab caches for all zsmalloc zpools
Date: Tue, 4 Jun 2024 17:53:40 +0000

Zswap creates multiple zpools to improve concurrency.  Each zsmalloc zpool
creates its own 'zs_handle' and 'zspage' slab caches.  Currently we end up
with 32 slab caches of each type.

Since each slab cache holds some free objects, we end up with a lot of
free objects distributed among the separate zpool caches.  Slab caches are
designed to handle concurrent allocations by using percpu structures, so
having a single instance of each cache should be enough, and avoids
wasting more memory than needed due to fragmentation.

Additionally, having more slab caches than needed unnecessarily slows down
code paths that iterate slab_caches.

In the results reported by Eric in [1], the amount of unused slab memory
in these caches goes down from 242808 bytes to 29216 bytes (-88%).  This
is calculated by (num_objs - active_objs) * objsize for each 'zs_handle'
and 'zspage' cache.  Although this patch did not help with the allocation
failure reported by Eric with zswap + zsmalloc, I think it is still worth
merging on its own.

[1]https://lore.kernel.org/lkml/20240604134458.3ae4396a@yea/

Link: https://lkml.kernel.org/r/20240604175340.218175-1-yosryahmed@xxxxxxxxxx
Signed-off-by: Yosry Ahmed <yosryahmed@xxxxxxxxxx>
Acked-by: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Erhard Furtner <erhard_f@xxxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Sergey Senozhatsky <senozhatsky@xxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 mm/zsmalloc.c |   87 ++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 43 deletions(-)

--- a/mm/zsmalloc.c~mm-zsmalloc-share-slab-caches-for-all-zsmalloc-zpools
+++ a/mm/zsmalloc.c
@@ -221,8 +221,6 @@ struct zs_pool {
 	const char *name;
 
 	struct size_class *size_class[ZS_SIZE_CLASSES];
-	struct kmem_cache *handle_cachep;
-	struct kmem_cache *zspage_cachep;
 
 	atomic_long_t pages_allocated;
 
@@ -290,50 +288,29 @@ static void init_deferred_free(struct zs
 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
 #endif
 
-static int create_cache(struct zs_pool *pool)
-{
-	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
-					0, 0, NULL);
-	if (!pool->handle_cachep)
-		return 1;
-
-	pool->zspage_cachep = kmem_cache_create("zspage", sizeof(struct zspage),
-					0, 0, NULL);
-	if (!pool->zspage_cachep) {
-		kmem_cache_destroy(pool->handle_cachep);
-		pool->handle_cachep = NULL;
-		return 1;
-	}
-
-	return 0;
-}
+static struct kmem_cache *zs_handle_cache;
+static struct kmem_cache *zspage_cache;
 
-static void destroy_cache(struct zs_pool *pool)
+static unsigned long cache_alloc_handle(gfp_t gfp)
 {
-	kmem_cache_destroy(pool->handle_cachep);
-	kmem_cache_destroy(pool->zspage_cachep);
-}
-
-static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
-{
-	return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
+	return (unsigned long)kmem_cache_alloc(zs_handle_cache,
 			gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
 }
 
-static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
+static void cache_free_handle(unsigned long handle)
 {
-	kmem_cache_free(pool->handle_cachep, (void *)handle);
+	kmem_cache_free(zs_handle_cache, (void *)handle);
 }
 
-static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
+static struct zspage *cache_alloc_zspage(gfp_t flags)
 {
-	return kmem_cache_zalloc(pool->zspage_cachep,
+	return kmem_cache_zalloc(zspage_cache,
 			flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
 }
 
-static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
+static void cache_free_zspage(struct zspage *zspage)
 {
-	kmem_cache_free(pool->zspage_cachep, zspage);
+	kmem_cache_free(zspage_cache, zspage);
 }
 
 /* pool->lock(which owns the handle) synchronizes races */
@@ -853,7 +830,7 @@ static void __free_zspage(struct zs_pool
 		page = next;
 	} while (page != NULL);
 
-	cache_free_zspage(pool, zspage);
+	cache_free_zspage(zspage);
 
 	class_stat_dec(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage);
 	atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated);
@@ -966,7 +943,7 @@ static struct zspage *alloc_zspage(struc
 {
 	int i;
 	struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE];
-	struct zspage *zspage = cache_alloc_zspage(pool, gfp);
+	struct zspage *zspage = cache_alloc_zspage(gfp);
 
 	if (!zspage)
 		return NULL;
@@ -984,7 +961,7 @@ static struct zspage *alloc_zspage(struc
 				__ClearPageZsmalloc(pages[i]);
 				__free_page(pages[i]);
 			}
-			cache_free_zspage(pool, zspage);
+			cache_free_zspage(zspage);
 			return NULL;
 		}
 		__SetPageZsmalloc(page);
@@ -1356,7 +1333,7 @@ unsigned long zs_malloc(struct zs_pool *
 	if (unlikely(size > ZS_MAX_ALLOC_SIZE))
 		return (unsigned long)ERR_PTR(-ENOSPC);
 
-	handle = cache_alloc_handle(pool, gfp);
+	handle = cache_alloc_handle(gfp);
 	if (!handle)
 		return (unsigned long)ERR_PTR(-ENOMEM);
 
@@ -1381,7 +1358,7 @@ unsigned long zs_malloc(struct zs_pool *
 
 	zspage = alloc_zspage(pool, class, gfp);
 	if (!zspage) {
-		cache_free_handle(pool, handle);
+		cache_free_handle(handle);
 		return (unsigned long)ERR_PTR(-ENOMEM);
 	}
 
@@ -1459,7 +1436,7 @@ void zs_free(struct zs_pool *pool, unsig
 		free_zspage(pool, class, zspage);
 
 	spin_unlock(&pool->lock);
-	cache_free_handle(pool, handle);
+	cache_free_handle(handle);
 }
 EXPORT_SYMBOL_GPL(zs_free);
 
@@ -2124,9 +2101,6 @@ struct zs_pool *zs_create_pool(const cha
 	if (!pool->name)
 		goto err;
 
-	if (create_cache(pool))
-		goto err;
-
 	/*
 	 * Iterate reversely, because, size of size_class that we want to use
 	 * for merging should be larger or equal to current size.
@@ -2247,16 +2221,41 @@ void zs_destroy_pool(struct zs_pool *poo
 		kfree(class);
 	}
 
-	destroy_cache(pool);
 	kfree(pool->name);
 	kfree(pool);
 }
 EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
+static void zs_destroy_caches(void)
+{
+	kmem_cache_destroy(zs_handle_cache);
+	kmem_cache_destroy(zspage_cache);
+	zs_handle_cache = NULL;
+	zspage_cache = NULL;
+}
+
+static int zs_create_caches(void)
+{
+	zs_handle_cache = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
+					    0, 0, NULL);
+	zspage_cache = kmem_cache_create("zspage", sizeof(struct zspage),
+					 0, 0, NULL);
+
+	if (!zs_handle_cache || !zspage_cache) {
+		zs_destroy_caches();
+		return -1;
+	}
+	return 0;
+}
+
 static int __init zs_init(void)
 {
 	int ret;
 
+	ret = zs_create_caches();
+	if (ret)
+		goto out;
+
 	ret = cpuhp_setup_state(CPUHP_MM_ZS_PREPARE, "mm/zsmalloc:prepare",
 				zs_cpu_prepare, zs_cpu_dead);
 	if (ret)
@@ -2271,6 +2270,7 @@ static int __init zs_init(void)
 	return 0;
 
 out:
+	zs_destroy_caches();
 	return ret;
 }
 
@@ -2282,6 +2282,7 @@ static void __exit zs_exit(void)
 	cpuhp_remove_state(CPUHP_MM_ZS_PREPARE);
 
 	zs_stat_exit();
+	zs_destroy_caches();
 }
 
 module_init(zs_init);
_

Patches currently in -mm which might be from yosryahmed@xxxxxxxxxx are

mm-zswap-use-sg_set_folio-in-zswap_compress-decompress.patch
mm-zswap-use-kmap_local_folio-in-zswap_load.patch
mm-zswap-make-same_filled-functions-folio-friendly.patch
mm-rmap-abstract-updating-per-node-and-per-memcg-stats.patch
mm-zsmalloc-share-slab-caches-for-all-zsmalloc-zpools.patch