On 2024/3/12 00:12, Johannes Weiner wrote: > Profiling the munmap() of a zswapped memory region shows 50%(!) of the > total cycles currently going into updating the zswap_pool_total_size. > > There are three consumers of this counter: > - store, to enforce the globally configured pool limit > - meminfo & debugfs, to report the size to the user > - shrink, to determine the batch size for each cycle > > Instead of aggregating everytime an entry enters or exits the zswap > pool, aggregate the value from the zpools on-demand: > > - Stores aggregate the counter anyway upon success. Aggregating to > check the limit instead is the same amount of work. > > - Meminfo & debugfs might benefit somewhat from a pre-aggregated > counter, but aren't exactly hotpaths. > > - Shrinking can aggregate once for every cycle instead of doing it for > every freed entry. As the shrinker might work on tens or hundreds of > objects per scan cycle, this is a large reduction in aggregations. > > The paths that benefit dramatically are swapin, swapoff, and > unmaps. There could be millions of pages being processed until > somebody asks for the pool size again. This eliminates the pool size > updates from those paths entirely. > > Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> Great! This is a clever simplification and optimization. With the incremental diff, feel free to add: Reviewed-by: Chengming Zhou <chengming.zhou@xxxxxxxxx> Thanks. > --- > fs/proc/meminfo.c | 3 +- > include/linux/zswap.h | 2 +- > mm/zswap.c | 98 +++++++++++++++++++++---------------------- > 3 files changed, 49 insertions(+), 54 deletions(-) > > diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c > index 45af9a989d40..245171d9164b 100644 > --- a/fs/proc/meminfo.c > +++ b/fs/proc/meminfo.c > @@ -89,8 +89,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) > show_val_kb(m, "SwapTotal: ", i.totalswap); > show_val_kb(m, "SwapFree: ", i.freeswap); > #ifdef CONFIG_ZSWAP > - seq_printf(m, "Zswap: %8lu kB\n", > - (unsigned long)(zswap_pool_total_size >> 10)); > + show_val_kb(m, "Zswap: ", zswap_total_pages()); > seq_printf(m, "Zswapped: %8lu kB\n", > (unsigned long)atomic_read(&zswap_stored_pages) << > (PAGE_SHIFT - 10)); > diff --git a/include/linux/zswap.h b/include/linux/zswap.h > index 341aea490070..2a85b941db97 100644 > --- a/include/linux/zswap.h > +++ b/include/linux/zswap.h > @@ -7,7 +7,6 @@ > > struct lruvec; > > -extern u64 zswap_pool_total_size; > extern atomic_t zswap_stored_pages; > > #ifdef CONFIG_ZSWAP > @@ -27,6 +26,7 @@ struct zswap_lruvec_state { > atomic_long_t nr_zswap_protected; > }; > > +unsigned long zswap_total_pages(void); > bool zswap_store(struct folio *folio); > bool zswap_load(struct folio *folio); > void zswap_invalidate(swp_entry_t swp); > diff --git a/mm/zswap.c b/mm/zswap.c > index 9a3237752082..7c39327a7cc2 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -43,8 +43,6 @@ > /********************************* > * statistics > **********************************/ > -/* Total bytes used by the compressed storage */ > -u64 zswap_pool_total_size; > /* The number of compressed pages currently stored in zswap */ > atomic_t zswap_stored_pages = ATOMIC_INIT(0); > /* The number of same-value filled pages currently stored in zswap */ > @@ -264,45 +262,6 @@ static inline struct zswap_tree *swap_zswap_tree(swp_entry_t swp) > pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ > zpool_get_type((p)->zpools[0])) > > -static bool zswap_is_full(void) > -{ > - return totalram_pages() * zswap_max_pool_percent / 100 < > - DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); > -} > - > -static bool zswap_can_accept(void) > -{ > - return totalram_pages() * zswap_accept_thr_percent / 100 * > - zswap_max_pool_percent / 100 > > - DIV_ROUND_UP(zswap_pool_total_size, PAGE_SIZE); > -} > - > -static u64 get_zswap_pool_size(struct zswap_pool *pool) > -{ > - u64 pool_size = 0; > - int i; > - > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) > - pool_size += zpool_get_total_size(pool->zpools[i]); > - > - return pool_size; > -} > - > -static void zswap_update_total_size(void) > -{ > - struct zswap_pool *pool; > - u64 total = 0; > - > - rcu_read_lock(); > - > - list_for_each_entry_rcu(pool, &zswap_pools, list) > - total += get_zswap_pool_size(pool); > - > - rcu_read_unlock(); > - > - zswap_pool_total_size = total; > -} > - > /********************************* > * pool functions > **********************************/ > @@ -540,6 +499,28 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) > return NULL; > } > > +static unsigned long zswap_max_pages(void) > +{ > + return totalram_pages() * zswap_max_pool_percent / 100; > +} > + > +unsigned long zswap_total_pages(void) > +{ > + struct zswap_pool *pool; > + u64 total = 0; > + > + rcu_read_lock(); > + list_for_each_entry_rcu(pool, &zswap_pools, list) { > + int i; > + > + for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) > + total += zpool_get_total_size(pool->zpools[i]); > + } > + rcu_read_unlock(); > + > + return total >> PAGE_SHIFT; > +} > + > /********************************* > * param callbacks > **********************************/ > @@ -912,7 +893,6 @@ static void zswap_entry_free(struct zswap_entry *entry) > } > zswap_entry_cache_free(entry); > atomic_dec(&zswap_stored_pages); > - zswap_update_total_size(); > } > > /* > @@ -1317,7 +1297,7 @@ static unsigned long zswap_shrinker_count(struct shrinker *shrinker, > nr_stored = memcg_page_state(memcg, MEMCG_ZSWAPPED); > #else > /* use pool stats instead of memcg stats */ > - nr_backing = zswap_pool_total_size >> PAGE_SHIFT; > + nr_backing = zswap_total_pages(); > nr_stored = atomic_read(&zswap_nr_stored); > #endif > > @@ -1385,6 +1365,10 @@ static void shrink_worker(struct work_struct *w) > { > struct mem_cgroup *memcg; > int ret, failures = 0; > + unsigned long thr; > + > + /* Reclaim down to the accept threshold */ > + thr = zswap_max_pages() * zswap_accept_thr_percent / 100; > > /* global reclaim will select cgroup in a round-robin fashion. */ > do { > @@ -1432,10 +1416,9 @@ static void shrink_worker(struct work_struct *w) > break; > if (ret && ++failures == MAX_RECLAIM_RETRIES) > break; > - > resched: > cond_resched(); > - } while (!zswap_can_accept()); > + } while (zswap_total_pages() > thr); > } > > static int zswap_is_page_same_filled(void *ptr, unsigned long *value) > @@ -1476,6 +1459,7 @@ bool zswap_store(struct folio *folio) > struct zswap_entry *entry, *dupentry; > struct obj_cgroup *objcg = NULL; > struct mem_cgroup *memcg = NULL; > + unsigned long max_pages, cur_pages; > > VM_WARN_ON_ONCE(!folio_test_locked(folio)); > VM_WARN_ON_ONCE(!folio_test_swapcache(folio)); > @@ -1487,6 +1471,7 @@ bool zswap_store(struct folio *folio) > if (!zswap_enabled) > goto check_old; > > + /* Check cgroup limits */ > objcg = get_obj_cgroup_from_folio(folio); > if (objcg && !obj_cgroup_may_zswap(objcg)) { > memcg = get_mem_cgroup_from_objcg(objcg); > @@ -1497,15 +1482,20 @@ bool zswap_store(struct folio *folio) > mem_cgroup_put(memcg); > } > > - /* reclaim space if needed */ > - if (zswap_is_full()) { > + /* Check global limits */ > + cur_pages = zswap_total_pages(); > + max_pages = zswap_max_pages(); > + > + if (cur_pages >= max_pages) { > zswap_pool_limit_hit++; > zswap_pool_reached_full = true; > goto shrink; > } > > if (zswap_pool_reached_full) { > - if (!zswap_can_accept()) > + unsigned long thr = max_pages * zswap_accept_thr_percent / 100; > + > + if (cur_pages > thr) > goto shrink; > else > zswap_pool_reached_full = false; > @@ -1581,7 +1571,6 @@ bool zswap_store(struct folio *folio) > > /* update stats */ > atomic_inc(&zswap_stored_pages); > - zswap_update_total_size(); > count_vm_event(ZSWPOUT); > > return true; > @@ -1711,6 +1700,13 @@ void zswap_swapoff(int type) > > static struct dentry *zswap_debugfs_root; > > +static int debugfs_get_total_size(void *data, u64 *val) > +{ > + *val = zswap_total_pages() * PAGE_SIZE; > + return 0; > +} > +DEFINE_DEBUGFS_ATTRIBUTE(total_size_fops, debugfs_get_total_size, NULL, "%llu"); > + > static int zswap_debugfs_init(void) > { > if (!debugfs_initialized()) > @@ -1732,8 +1728,8 @@ static int zswap_debugfs_init(void) > zswap_debugfs_root, &zswap_reject_compress_poor); > debugfs_create_u64("written_back_pages", 0444, > zswap_debugfs_root, &zswap_written_back_pages); > - debugfs_create_u64("pool_total_size", 0444, > - zswap_debugfs_root, &zswap_pool_total_size); > + debugfs_create_file("pool_total_size", 0444, > + zswap_debugfs_root, NULL, &total_size_fops); > debugfs_create_atomic_t("stored_pages", 0444, > zswap_debugfs_root, &zswap_stored_pages); > debugfs_create_atomic_t("same_filled_pages", 0444,