From: Barry Song <v-songbaohua@xxxxxxxx> When the proportion of folios from the zero map is small, missing their accounting may not significantly impact profiling. However, it’s easy to construct a scenario where this becomes an issue—for example, allocating 1 GB of memory, writing zeros from userspace, followed by MADV_PAGEOUT, and then swapping it back in. In this case, the swap-out and swap-in counts seem to vanish into a black hole, potentially causing semantic ambiguity. We have two ways to address this: 1. Add a separate counter specifically for the zero map. 2. Continue using the current accounting, treating the zero map like a normal backend. (This aligns with the current behavior of zRAM when supporting same-page fills at the device level.) This patch adopts option 1 as pswpin/pswpout counters are that they only apply to IO done directly to the backend device (as noted by Nhat Pham). We can find these counters from /proc/vmstat (counters for the whole system) and memcg's memory.stat (counters for the interested memcg). For example: $ grep -E 'swpin_zero|swpout_zero' /proc/vmstat swpin_zero 1648 swpout_zero 33536 $ grep -E 'swpin_zero|swpout_zero' /sys/fs/cgroup/system.slice/memory.stat swpin_zero 3905 swpout_zero 3985 Fixes: 0ca0c24e3211 ("mm: store zero pages to be swapped out in a bitmap") Cc: Usama Arif <usamaarif642@xxxxxxxxx> Cc: Chengming Zhou <chengming.zhou@xxxxxxxxx> Cc: Yosry Ahmed <yosryahmed@xxxxxxxxxx> Cc: Nhat Pham <nphamcs@xxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> Cc: Shakeel Butt <shakeel.butt@xxxxxxxxx> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> Cc: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> Cc: Chris Li <chrisl@xxxxxxxxxx> Cc: "Huang, Ying" <ying.huang@xxxxxxxxx> Cc: Kairui Song <kasong@xxxxxxxxxxx> Cc: Ryan Roberts <ryan.roberts@xxxxxxx> Signed-off-by: Barry Song <v-songbaohua@xxxxxxxx> --- -v2: * add separate counters rather than using pswpin/out; thanks for the comments from Usama, David, Yosry and Nhat; * Usama also suggested a new counter like swapped_zero, I prefer that one be separated as an enhancement patch not a hotfix. will probably handle it later on. Documentation/admin-guide/cgroup-v2.rst | 10 ++++++++++ include/linux/vm_event_item.h | 2 ++ mm/memcontrol.c | 4 ++++ mm/page_io.c | 16 ++++++++++++++++ mm/vmstat.c | 2 ++ 5 files changed, 34 insertions(+) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index db3799f1483e..984eb3c9d05b 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1599,6 +1599,16 @@ The following nested keys are defined. pglazyfreed (npn) Amount of reclaimed lazyfree pages + swpin_zero + Number of pages moved into memory with zero content, meaning no + copy exists in the backend swapfile, allowing swap-in to avoid + I/O read overhead. + + swpout_zero + Number of pages moved out of memory with zero content, meaning no + copy is needed in the backend swapfile, allowing swap-out to avoid + I/O write overhead. + zswpin Number of pages moved in to memory from zswap. diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index aed952d04132..f70d0958095c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -134,6 +134,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, #ifdef CONFIG_SWAP SWAP_RA, SWAP_RA_HIT, + SWPIN_ZERO, + SWPOUT_ZERO, #ifdef CONFIG_KSM KSM_SWPIN_COPY, #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 5e44d6e7591e..7b3503d12aaf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -441,6 +441,10 @@ static const unsigned int memcg_vm_event_stat[] = { PGDEACTIVATE, PGLAZYFREE, PGLAZYFREED, +#ifdef CONFIG_SWAP + SWPIN_ZERO, + SWPOUT_ZERO, +#endif #ifdef CONFIG_ZSWAP ZSWPIN, ZSWPOUT, diff --git a/mm/page_io.c b/mm/page_io.c index 5d9b6e6cf96c..4b4ea8e49cf6 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -204,7 +204,9 @@ static bool is_folio_zero_filled(struct folio *folio) static void swap_zeromap_folio_set(struct folio *folio) { + struct obj_cgroup *objcg = get_obj_cgroup_from_folio(folio); struct swap_info_struct *sis = swp_swap_info(folio->swap); + int nr_pages = folio_nr_pages(folio); swp_entry_t entry; unsigned int i; @@ -212,6 +214,12 @@ static void swap_zeromap_folio_set(struct folio *folio) entry = page_swap_entry(folio_page(folio, i)); set_bit(swp_offset(entry), sis->zeromap); } + + count_vm_events(SWPOUT_ZERO, nr_pages); + if (objcg) { + count_objcg_events(objcg, SWPOUT_ZERO, nr_pages); + obj_cgroup_put(objcg); + } } static void swap_zeromap_folio_clear(struct folio *folio) @@ -507,6 +515,7 @@ static void sio_read_complete(struct kiocb *iocb, long ret) static bool swap_read_folio_zeromap(struct folio *folio) { int nr_pages = folio_nr_pages(folio); + struct obj_cgroup *objcg; bool is_zeromap; /* @@ -521,6 +530,13 @@ static bool swap_read_folio_zeromap(struct folio *folio) if (!is_zeromap) return false; + objcg = get_obj_cgroup_from_folio(folio); + count_vm_events(SWPIN_ZERO, nr_pages); + if (objcg) { + count_objcg_events(objcg, SWPIN_ZERO, nr_pages); + obj_cgroup_put(objcg); + } + folio_zero_range(folio, 0, folio_size(folio)); folio_mark_uptodate(folio); return true; diff --git a/mm/vmstat.c b/mm/vmstat.c index 22a294556b58..c8ef7352f9ed 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1418,6 +1418,8 @@ const char * const vmstat_text[] = { #ifdef CONFIG_SWAP "swap_ra", "swap_ra_hit", + "swpin_zero", + "swpout_zero", #ifdef CONFIG_KSM "ksm_swpin_copy", #endif -- 2.39.3 (Apple Git-146)