On Mon, Jun 17, 2024 at 2:16 PM Yu Zhao <yuzhao@xxxxxxxxxx> wrote: > > On Mon, Jun 17, 2024 at 6:58 AM Chengming Zhou <chengming.zhou@xxxxxxxxx> wrote: > > > > Zswap uses 32 pools to workaround the locking scalability problem in > > zsmalloc, > > Note that zpool can have other backends (zbud, z3fold), and the > original patch was developed (even before zswap could use zsmalloc) to > make sure it works for all the backend. > > This patch only makes sense now only because zsmalloc became a lot > more popular than other backends (even though some distros still > default to zbud). And this might also have answered Yosry's question about the "historical context" here [1]. [1] https://lore.kernel.org/CAJD7tkbO+ZLdhs-9BpthztZX32i8C4=QEnoiXGS7bM399nqwzg@xxxxxxxxxxxxxx/ > > which brings its own problems like memory waste and more > > memory fragmentation. > > > > Testing results show that we can have near performance with only one > > pool in zswap after changing zsmalloc to use per-size_class lock instead > > of pool spinlock. > > > > Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB, > > and zswap shrinker enabled with 10GB swapfile on ext4. > > > > real user sys > > 6.10.0-rc3 138.18 1241.38 1452.73 > > 6.10.0-rc3-onepool 149.45 1240.45 1844.69 > > 6.10.0-rc3-onepool-perclass 138.23 1242.37 1469.71 > > > > Signed-off-by: Chengming Zhou <chengming.zhou@xxxxxxxxx> > > --- > > mm/zswap.c | 60 +++++++++++++++++++----------------------------------------- > > 1 file changed, 19 insertions(+), 41 deletions(-) > > > > diff --git a/mm/zswap.c b/mm/zswap.c > > index e25a6808c2ed..5063c5372e51 100644 > > --- a/mm/zswap.c > > +++ b/mm/zswap.c > > @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ > > module_param_named(accept_threshold_percent, zswap_accept_thr_percent, > > uint, 0644); > > > > -/* Number of zpools in zswap_pool (empirically determined for scalability) */ > > -#define ZSWAP_NR_ZPOOLS 32 > > - > > /* Enable/disable memory pressure-based shrinker. */ > > static bool zswap_shrinker_enabled = IS_ENABLED( > > CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); > > @@ -160,7 +157,7 @@ struct crypto_acomp_ctx { > > * needs to be verified that it's still valid in the tree. > > */ > > struct zswap_pool { > > - struct zpool *zpools[ZSWAP_NR_ZPOOLS]; > > + struct zpool *zpool; > > struct crypto_acomp_ctx __percpu *acomp_ctx; > > struct percpu_ref ref; > > struct list_head list; > > @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp) > > > > #define zswap_pool_debug(msg, p) \ > > pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ > > - zpool_get_type((p)->zpools[0])) > > + zpool_get_type((p)->zpool)) > > > > /********************************* > > * pool functions > > @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref); > > > > static struct zswap_pool *zswap_pool_create(char *type, char *compressor) > > { > > - int i; > > struct zswap_pool *pool; > > char name[38]; /* 'zswap' + 32 char (max) num + \0 */ > > gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; > > @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) > > if (!pool) > > return NULL; > > > > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { > > - /* unique name for each pool specifically required by zsmalloc */ > > - snprintf(name, 38, "zswap%x", > > - atomic_inc_return(&zswap_pools_count)); > > - > > - pool->zpools[i] = zpool_create_pool(type, name, gfp); > > - if (!pool->zpools[i]) { > > - pr_err("%s zpool not available\n", type); > > - goto error; > > - } > > + /* unique name for each pool specifically required by zsmalloc */ > > + snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); > > + pool->zpool = zpool_create_pool(type, name, gfp); > > + if (!pool->zpool) { > > + pr_err("%s zpool not available\n", type); > > + goto error; > > } > > - pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); > > + pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); > > > > strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); > > > > @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) > > error: > > if (pool->acomp_ctx) > > free_percpu(pool->acomp_ctx); > > - while (i--) > > - zpool_destroy_pool(pool->zpools[i]); > > + zpool_destroy_pool(pool->zpool); > > kfree(pool); > > return NULL; > > } > > @@ -361,15 +352,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void) > > > > static void zswap_pool_destroy(struct zswap_pool *pool) > > { > > - int i; > > - > > zswap_pool_debug("destroying", pool); > > > > cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); > > free_percpu(pool->acomp_ctx); > > > > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) > > - zpool_destroy_pool(pool->zpools[i]); > > + zpool_destroy_pool(pool->zpool); > > kfree(pool); > > } > > > > @@ -464,8 +452,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) > > list_for_each_entry_rcu(pool, &zswap_pools, list) { > > if (strcmp(pool->tfm_name, compressor)) > > continue; > > - /* all zpools share the same type */ > > - if (strcmp(zpool_get_type(pool->zpools[0]), type)) > > + if (strcmp(zpool_get_type(pool->zpool), type)) > > continue; > > /* if we can't get it, it's about to be destroyed */ > > if (!zswap_pool_get(pool)) > > @@ -492,12 +479,8 @@ unsigned long zswap_total_pages(void) > > unsigned long total = 0; > > > > rcu_read_lock(); > > - list_for_each_entry_rcu(pool, &zswap_pools, list) { > > - int i; > > - > > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) > > - total += zpool_get_total_pages(pool->zpools[i]); > > - } > > + list_for_each_entry_rcu(pool, &zswap_pools, list) > > + total += zpool_get_total_pages(pool->zpool); > > rcu_read_unlock(); > > > > return total; > > @@ -802,11 +785,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry) > > kmem_cache_free(zswap_entry_cache, entry); > > } > > > > -static struct zpool *zswap_find_zpool(struct zswap_entry *entry) > > -{ > > - return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))]; > > -} > > - > > /* > > * Carries out the common pattern of freeing and entry's zpool allocation, > > * freeing the entry itself, and decrementing the number of stored pages. > > @@ -814,7 +792,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry) > > static void zswap_entry_free(struct zswap_entry *entry) > > { > > zswap_lru_del(&zswap_list_lru, entry); > > - zpool_free(zswap_find_zpool(entry), entry->handle); > > + zpool_free(entry->pool->zpool, entry->handle); > > zswap_pool_put(entry->pool); > > if (entry->objcg) { > > obj_cgroup_uncharge_zswap(entry->objcg, entry->length); > > @@ -939,7 +917,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) > > if (comp_ret) > > goto unlock; > > > > - zpool = zswap_find_zpool(entry); > > + zpool = entry->pool->zpool; > > gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; > > if (zpool_malloc_support_movable(zpool)) > > gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; > > @@ -968,7 +946,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) > > > > static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) > > { > > - struct zpool *zpool = zswap_find_zpool(entry); > > + struct zpool *zpool = entry->pool->zpool; > > struct scatterlist input, output; > > struct crypto_acomp_ctx *acomp_ctx; > > u8 *src; > > @@ -1467,7 +1445,7 @@ bool zswap_store(struct folio *folio) > > return true; > > > > store_failed: > > - zpool_free(zswap_find_zpool(entry), entry->handle); > > + zpool_free(entry->pool->zpool, entry->handle); > > put_pool: > > zswap_pool_put(entry->pool); > > freepage: > > @@ -1683,7 +1661,7 @@ static int zswap_setup(void) > > pool = __zswap_pool_create_fallback(); > > if (pool) { > > pr_info("loaded using pool %s/%s\n", pool->tfm_name, > > - zpool_get_type(pool->zpools[0])); > > + zpool_get_type(pool->zpool)); > > list_add(&pool->list, &zswap_pools); > > zswap_has_pool = true; > > static_branch_enable(&zswap_ever_enabled); > > > > -- > > 2.45.2 > > > >