On Fri 24-01-25 00:41:32, Johannes Weiner wrote: > The interweaving of two entirely different swap accounting strategies > has been one of the more confusing parts of the memcg code. Split out > the v1 code to clarify the implementation and a handful of callsites, > and to avoid building the v1 bits when !CONFIG_MEMCG_V1. > > text data bss dec hex filename > 39253 6446 4160 49859 c2c3 mm/memcontrol.o.old > 38877 6382 4160 49419 c10b mm/memcontrol.o > > Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Thanks! > --- > include/linux/memcontrol.h | 17 +++-- > include/linux/swap.h | 5 -- > mm/huge_memory.c | 2 +- > mm/memcontrol-v1.c | 89 ++++++++++++++++++++++++- > mm/memcontrol-v1.h | 6 +- > mm/memcontrol.c | 129 ++++++------------------------------- > mm/memory.c | 2 +- > mm/shmem.c | 2 +- > mm/swap_state.c | 2 +- > mm/vmscan.c | 2 +- > 10 files changed, 126 insertions(+), 130 deletions(-) > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 6e74b8254d9b..57664e2a8fb7 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -649,8 +649,6 @@ int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp); > int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, > gfp_t gfp, swp_entry_t entry); > > -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages); > - > void __mem_cgroup_uncharge(struct folio *folio); > > /** > @@ -1165,10 +1163,6 @@ static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, > return 0; > } > > -static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr) > -{ > -} > - > static inline void mem_cgroup_uncharge(struct folio *folio) > { > } > @@ -1848,6 +1842,9 @@ static inline void mem_cgroup_exit_user_fault(void) > current->in_user_fault = 0; > } > > +void memcg1_swapout(struct folio *folio, swp_entry_t entry); > +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages); > + > #else /* CONFIG_MEMCG_V1 */ > static inline > unsigned long memcg1_soft_limit_reclaim(pg_data_t *pgdat, int order, > @@ -1875,6 +1872,14 @@ static inline void mem_cgroup_exit_user_fault(void) > { > } > > +static inline void memcg1_swapout(struct folio *folio, swp_entry_t entry) > +{ > +} > + > +static inline void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) > +{ > +} > + > #endif /* CONFIG_MEMCG_V1 */ > > #endif /* _LINUX_MEMCONTROL_H */ > diff --git a/include/linux/swap.h b/include/linux/swap.h > index b13b72645db3..91b30701274e 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -659,7 +659,6 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp) > #endif > > #if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP) > -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry); > int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry); > static inline int mem_cgroup_try_charge_swap(struct folio *folio, > swp_entry_t entry) > @@ -680,10 +679,6 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p > extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg); > extern bool mem_cgroup_swap_full(struct folio *folio); > #else > -static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) > -{ > -} > - > static inline int mem_cgroup_try_charge_swap(struct folio *folio, > swp_entry_t entry) > { > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 3d3ebdc002d5..c40b42a1015a 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -3740,7 +3740,7 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped) > > /* > * Exclude swapcache: originally to avoid a corrupt deferred split > - * queue. Nowadays that is fully prevented by mem_cgroup_swapout(); > + * queue. Nowadays that is fully prevented by memcg1_swapout(); > * but if page reclaim is already handling the same folio, it is > * unnecessary to handle it again in the shrinker, so excluding > * swapcache here may still be a useful optimization. > diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c > index 6d184fae0ad1..1d16a99fb964 100644 > --- a/mm/memcontrol-v1.c > +++ b/mm/memcontrol-v1.c > @@ -581,8 +581,59 @@ void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg) > local_irq_restore(flags); > } > > -void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) > +/** > + * memcg1_swapout - transfer a memsw charge to swap > + * @folio: folio whose memsw charge to transfer > + * @entry: swap entry to move the charge to > + * > + * Transfer the memsw charge of @folio to @entry. > + */ > +void memcg1_swapout(struct folio *folio, swp_entry_t entry) > { > + struct mem_cgroup *memcg, *swap_memcg; > + unsigned int nr_entries; > + > + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); > + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); > + > + if (mem_cgroup_disabled()) > + return; > + > + if (!do_memsw_account()) > + return; > + > + memcg = folio_memcg(folio); > + > + VM_WARN_ON_ONCE_FOLIO(!memcg, folio); > + if (!memcg) > + return; > + > + /* > + * In case the memcg owning these pages has been offlined and doesn't > + * have an ID allocated to it anymore, charge the closest online > + * ancestor for the swap instead and transfer the memory+swap charge. > + */ > + swap_memcg = mem_cgroup_id_get_online(memcg); > + nr_entries = folio_nr_pages(folio); > + /* Get references for the tail pages, too */ > + if (nr_entries > 1) > + mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); > + mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); > + > + swap_cgroup_record(folio, entry); > + > + folio_unqueue_deferred_split(folio); > + folio->memcg_data = 0; > + > + if (!mem_cgroup_is_root(memcg)) > + page_counter_uncharge(&memcg->memory, nr_entries); > + > + if (memcg != swap_memcg) { > + if (!mem_cgroup_is_root(swap_memcg)) > + page_counter_charge(&swap_memcg->memsw, nr_entries); > + page_counter_uncharge(&memcg->memsw, nr_entries); > + } > + > /* > * Interrupts should be disabled here because the caller holds the > * i_pages lock which is taken with interrupts-off. It is > @@ -594,6 +645,42 @@ void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) > memcg1_charge_statistics(memcg, -folio_nr_pages(folio)); > preempt_enable_nested(); > memcg1_check_events(memcg, folio_nid(folio)); > + > + css_put(&memcg->css); > +} > + > +/* > + * memcg1_swapin - uncharge swap slot > + * @entry: the first swap entry for which the pages are charged > + * @nr_pages: number of pages which will be uncharged > + * > + * Call this function after successfully adding the charged page to swapcache. > + * > + * Note: This function assumes the page for which swap slot is being uncharged > + * is order 0 page. > + */ > +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) > +{ > + /* > + * Cgroup1's unified memory+swap counter has been charged with the > + * new swapcache page, finish the transfer by uncharging the swap > + * slot. The swap slot would also get uncharged when it dies, but > + * it can stick around indefinitely and we'd count the page twice > + * the entire time. > + * > + * Cgroup2 has separate resource counters for memory and swap, > + * so this is a non-issue here. Memory and swap charge lifetimes > + * correspond 1:1 to page and swap slot lifetimes: we charge the > + * page to memory here, and uncharge swap when the slot is freed. > + */ > + if (do_memsw_account()) { > + /* > + * The swap entry might not get freed for a long time, > + * let's not wait for it. The page already received a > + * memory+swap charge, drop the swap entry duplicate. > + */ > + mem_cgroup_uncharge_swap(entry, nr_pages); > + } > } > > void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, > diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h > index 4c8f36430fe9..1dc759e65471 100644 > --- a/mm/memcontrol-v1.h > +++ b/mm/memcontrol-v1.h > @@ -39,6 +39,9 @@ unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item); > unsigned long memcg_page_state_local_output(struct mem_cgroup *memcg, int item); > int memory_stat_show(struct seq_file *m, void *v); > > +void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n); > +struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg); > + > /* Cgroup v1-specific declarations */ > #ifdef CONFIG_MEMCG_V1 > > @@ -69,7 +72,6 @@ void memcg1_oom_finish(struct mem_cgroup *memcg, bool locked); > void memcg1_oom_recover(struct mem_cgroup *memcg); > > void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg); > -void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg); > void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, > unsigned long nr_memory, int nid); > > @@ -107,8 +109,6 @@ static inline void memcg1_oom_recover(struct mem_cgroup *memcg) {} > static inline void memcg1_commit_charge(struct folio *folio, > struct mem_cgroup *memcg) {} > > -static inline void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) {} > - > static inline void memcg1_uncharge_batch(struct mem_cgroup *memcg, > unsigned long pgpgout, > unsigned long nr_memory, int nid) {} > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 818143b81760..a95cb3fbb2c8 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -3377,7 +3377,7 @@ static void mem_cgroup_id_remove(struct mem_cgroup *memcg) > } > } > > -static void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, > +void __maybe_unused mem_cgroup_id_get_many(struct mem_cgroup *memcg, > unsigned int n) > { > refcount_add(n, &memcg->id.ref); > @@ -3398,6 +3398,24 @@ static inline void mem_cgroup_id_put(struct mem_cgroup *memcg) > mem_cgroup_id_put_many(memcg, 1); > } > > +struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) > +{ > + while (!refcount_inc_not_zero(&memcg->id.ref)) { > + /* > + * The root cgroup cannot be destroyed, so it's refcount must > + * always be >= 1. > + */ > + if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) { > + VM_BUG_ON(1); > + break; > + } > + memcg = parent_mem_cgroup(memcg); > + if (!memcg) > + memcg = root_mem_cgroup; > + } > + return memcg; > +} > + > /** > * mem_cgroup_from_id - look up a memcg from a memcg id > * @id: the memcg id to look up > @@ -4585,40 +4603,6 @@ int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, > return ret; > } > > -/* > - * mem_cgroup_swapin_uncharge_swap - uncharge swap slot > - * @entry: the first swap entry for which the pages are charged > - * @nr_pages: number of pages which will be uncharged > - * > - * Call this function after successfully adding the charged page to swapcache. > - * > - * Note: This function assumes the page for which swap slot is being uncharged > - * is order 0 page. > - */ > -void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) > -{ > - /* > - * Cgroup1's unified memory+swap counter has been charged with the > - * new swapcache page, finish the transfer by uncharging the swap > - * slot. The swap slot would also get uncharged when it dies, but > - * it can stick around indefinitely and we'd count the page twice > - * the entire time. > - * > - * Cgroup2 has separate resource counters for memory and swap, > - * so this is a non-issue here. Memory and swap charge lifetimes > - * correspond 1:1 to page and swap slot lifetimes: we charge the > - * page to memory here, and uncharge swap when the slot is freed. > - */ > - if (do_memsw_account()) { > - /* > - * The swap entry might not get freed for a long time, > - * let's not wait for it. The page already received a > - * memory+swap charge, drop the swap entry duplicate. > - */ > - mem_cgroup_uncharge_swap(entry, nr_pages); > - } > -} > - > struct uncharge_gather { > struct mem_cgroup *memcg; > unsigned long nr_memory; > @@ -4944,81 +4928,6 @@ static int __init mem_cgroup_init(void) > subsys_initcall(mem_cgroup_init); > > #ifdef CONFIG_SWAP > -static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) > -{ > - while (!refcount_inc_not_zero(&memcg->id.ref)) { > - /* > - * The root cgroup cannot be destroyed, so it's refcount must > - * always be >= 1. > - */ > - if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) { > - VM_BUG_ON(1); > - break; > - } > - memcg = parent_mem_cgroup(memcg); > - if (!memcg) > - memcg = root_mem_cgroup; > - } > - return memcg; > -} > - > -/** > - * mem_cgroup_swapout - transfer a memsw charge to swap > - * @folio: folio whose memsw charge to transfer > - * @entry: swap entry to move the charge to > - * > - * Transfer the memsw charge of @folio to @entry. > - */ > -void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry) > -{ > - struct mem_cgroup *memcg, *swap_memcg; > - unsigned int nr_entries; > - > - VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); > - VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); > - > - if (mem_cgroup_disabled()) > - return; > - > - if (!do_memsw_account()) > - return; > - > - memcg = folio_memcg(folio); > - > - VM_WARN_ON_ONCE_FOLIO(!memcg, folio); > - if (!memcg) > - return; > - > - /* > - * In case the memcg owning these pages has been offlined and doesn't > - * have an ID allocated to it anymore, charge the closest online > - * ancestor for the swap instead and transfer the memory+swap charge. > - */ > - swap_memcg = mem_cgroup_id_get_online(memcg); > - nr_entries = folio_nr_pages(folio); > - /* Get references for the tail pages, too */ > - if (nr_entries > 1) > - mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); > - mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); > - > - swap_cgroup_record(folio, entry); > - > - folio_unqueue_deferred_split(folio); > - folio->memcg_data = 0; > - > - if (!mem_cgroup_is_root(memcg)) > - page_counter_uncharge(&memcg->memory, nr_entries); > - > - if (memcg != swap_memcg) { > - if (!mem_cgroup_is_root(swap_memcg)) > - page_counter_charge(&swap_memcg->memsw, nr_entries); > - page_counter_uncharge(&memcg->memsw, nr_entries); > - } > - > - memcg1_swapout(folio, memcg); > - css_put(&memcg->css); > -} > - > /** > * __mem_cgroup_try_charge_swap - try charging swap space for a folio > * @folio: folio being added to swap > diff --git a/mm/memory.c b/mm/memory.c > index 2a20e3810534..708ae27673b1 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -4393,7 +4393,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > } > need_clear_cache = true; > > - mem_cgroup_swapin_uncharge_swap(entry, nr_pages); > + memcg1_swapin(entry, nr_pages); > > shadow = get_shadow_from_swap_cache(entry); > if (shadow) > diff --git a/mm/shmem.c b/mm/shmem.c > index 44379bee5b96..d885ecb6fe1e 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -2017,7 +2017,7 @@ static struct folio *shmem_swap_alloc_folio(struct inode *inode, > __folio_set_swapbacked(new); > new->swap = entry; > > - mem_cgroup_swapin_uncharge_swap(entry, nr_pages); > + memcg1_swapin(entry, nr_pages); > shadow = get_shadow_from_swap_cache(entry); > if (shadow) > workingset_refault(new, shadow); > diff --git a/mm/swap_state.c b/mm/swap_state.c > index ca42b2be64d9..2e1acb210e57 100644 > --- a/mm/swap_state.c > +++ b/mm/swap_state.c > @@ -521,7 +521,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, > if (add_to_swap_cache(new_folio, entry, gfp_mask & GFP_RECLAIM_MASK, &shadow)) > goto fail_unlock; > > - mem_cgroup_swapin_uncharge_swap(entry, 1); > + memcg1_swapin(entry, 1); > > if (shadow) > workingset_refault(new_folio, shadow); > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 3bbe917b6a34..b2b2f27b10a0 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -769,7 +769,7 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio, > if (reclaimed && !mapping_exiting(mapping)) > shadow = workingset_eviction(folio, target_memcg); > __delete_from_swap_cache(folio, swap, shadow); > - mem_cgroup_swapout(folio, swap); > + memcg1_swapout(folio, swap); > xa_unlock_irq(&mapping->i_pages); > put_swap_folio(folio, swap); > } else { > -- > 2.48.1 -- Michal Hocko SUSE Labs