On Fri, 19 Mar 2021, Johannes Weiner wrote: > The swapaccounting= commandline option already does very little > today. To close a trivial containment failure case, the swap ownership > tracking part of the swap controller has recently become mandatory > (see commit 2d1c498072de ("mm: memcontrol: make swap tracking an > integral part of memory control") for details), which makes up the > majority of the work during swapout, swapin, and the swap slot map. > > The only thing left under this flag is the page_counter operations and > the visibility of the swap control files in the first place, which are > rather meager savings. There also aren't many scenarios, if any, where > controlling the memory of a cgroup while allowing it unlimited access > to a global swap space is a workable resource isolation stragegy. > > On the other hand, there have been several bugs and confusion around > the many possible swap controller states (cgroup1 vs cgroup2 behavior, > memory accounting without swap accounting, memcg runtime disabled). > > This puts the maintenance overhead of retaining the toggle above its > practical benefits. Deprecate it. > > Suggested-by: Shakeel Butt <shakeelb@xxxxxxxxxx> > Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> This crashes, and needs a fix: see below (plus some nits). But it's a very welcome cleanup: just getting rid of all those !cgroup_memory_noswap double negatives is a relief in itself. It does suggest eliminating CONFIG_MEMCG_SWAP altogether (just using #ifdef CONFIG_SWAP instead, in those parts of CONFIG_MEMCG code); but you're right that's a separate cleanup, and not nearly so worthwhile as this one (I notice CONFIG_MEMCG_SWAP in some of the arch defconfigs, and don't know whether whoever removes CONFIG_MEMCG_SWAP would be obligated to remove those too). > --- > .../admin-guide/kernel-parameters.txt | 5 -- > include/linux/memcontrol.h | 4 -- > mm/memcontrol.c | 48 ++++++------------- > 3 files changed, 15 insertions(+), 42 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 942bbef8f128..986d45dd8c37 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -5322,11 +5322,6 @@ > This parameter controls use of the Protected > Execution Facility on pSeries. > > - swapaccount=[0|1] > - [KNL] Enable accounting of swap in memory resource > - controller if no parameter or 1 is given or disable > - it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst) > - > swiotlb= [ARM,IA-64,PPC,MIPS,X86] > Format: { <int> | force | noforce } > <int> -- Number of I/O TLB slabs > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h > index 4064c9dda534..ef9613538d36 100644 > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -874,10 +874,6 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim, > struct mem_cgroup *oom_domain); > void mem_cgroup_print_oom_group(struct mem_cgroup *memcg); > > -#ifdef CONFIG_MEMCG_SWAP > -extern bool cgroup_memory_noswap; > -#endif > - > void lock_page_memcg(struct page *page); > void unlock_page_memcg(struct page *page); > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 49bdcf603af1..b036c4fb0fa7 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -85,13 +85,6 @@ static bool cgroup_memory_nosocket; > /* Kernel memory accounting disabled? */ > static bool cgroup_memory_nokmem; > > -/* Whether the swap controller is active */ > -#ifdef CONFIG_MEMCG_SWAP > -bool cgroup_memory_noswap __read_mostly; > -#else > -#define cgroup_memory_noswap 1 > -#endif > - > #ifdef CONFIG_CGROUP_WRITEBACK > static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); > #endif > @@ -99,7 +92,11 @@ static DECLARE_WAIT_QUEUE_HEAD(memcg_cgwb_frn_waitq); > /* Whether legacy memory+swap accounting is active */ > static bool do_memsw_account(void) > { > - return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap; > + /* cgroup2 doesn't do mem+swap accounting */ > + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) > + return false; > + > + return true; Nit: I'm not fond of the "if (boolean()) return true; else return false;" codestyle, and would prefer the straightforward return !cgroup_subsys_on_dfl(memory_cgrp_subsys); but you've chosen otherwise, so, okay. > } > > #define THRESHOLDS_EVENTS_TARGET 128 > @@ -7019,7 +7016,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) > if (!mem_cgroup_is_root(memcg)) > page_counter_uncharge(&memcg->memory, nr_entries); > > - if (!cgroup_memory_noswap && memcg != swap_memcg) { > + if (memcg != swap_memcg) { > if (!mem_cgroup_is_root(swap_memcg)) > page_counter_charge(&swap_memcg->memsw, nr_entries); > page_counter_uncharge(&memcg->memsw, nr_entries); > @@ -7073,7 +7070,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) > > memcg = mem_cgroup_id_get_online(memcg); > > - if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg) && > + if (!mem_cgroup_is_root(memcg) && > !page_counter_try_charge(&memcg->swap, nr_pages, &counter)) { > memcg_memory_event(memcg, MEMCG_SWAP_MAX); > memcg_memory_event(memcg, MEMCG_SWAP_FAIL); > @@ -7108,7 +7105,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages) > rcu_read_lock(); > memcg = mem_cgroup_from_id(id); > if (memcg) { > - if (!cgroup_memory_noswap && !mem_cgroup_is_root(memcg)) { > + if (!mem_cgroup_is_root(memcg)) { > if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) > page_counter_uncharge(&memcg->swap, nr_pages); > else > @@ -7124,7 +7121,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg) > { > long nr_swap_pages = get_nr_swap_pages(); > > - if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) > + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) That needs to check mem_cgroup_disabled() where it used to check cgroup_memory_noswap. The convolutions are confusing (which is precisely why this is such a welcome cleanup), but without a mem_cgroup_disabled() (or NULL memcg) check there, the cgroup_disable=memory case oopses on NULLish pointer dereference when mem_cgroup_get_nr_swap_pages() is called from get_scan_count(). (This little function has been repeatedly troublesome that way.) > return nr_swap_pages; > for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) > nr_swap_pages = min_t(long, nr_swap_pages, > @@ -7141,7 +7138,7 @@ bool mem_cgroup_swap_full(struct page *page) > > if (vm_swap_full()) > return true; > - if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) > + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) Would it now be better to say "if (do_memsw_account())" there? Or would it be better to eliminate do_memsw_account() altogether, and just use !cgroup_subsys_on_dfl(memory_cgrp_subsys) throughout? (Though I don't find "cgroup_subsys_on_dfl" very informative.) > return false; > > memcg = page_memcg(page); > @@ -7161,11 +7158,10 @@ bool mem_cgroup_swap_full(struct page *page) > > static int __init setup_swap_account(char *s) > { > - if (!strcmp(s, "1")) > - cgroup_memory_noswap = false; > - else if (!strcmp(s, "0")) > - cgroup_memory_noswap = true; > - return 1; > + pr_warn_once("The swapaccount= commandline option is deprecated. " > + "Please report your usecase to linux-mm@xxxxxxxxx if you " > + "depend on this functionality.\n"); Okay: the long line would annoy me, but that might be its intent, and follows precedent elsewhere. > + return 0; > } > __setup("swapaccount=", setup_swap_account); > > @@ -7291,27 +7287,13 @@ static struct cftype memsw_files[] = { > { }, /* terminate */ > }; > > -/* > - * If mem_cgroup_swap_init() is implemented as a subsys_initcall() > - * instead of a core_initcall(), this could mean cgroup_memory_noswap still > - * remains set to false even when memcg is disabled via "cgroup_disable=memory" > - * boot parameter. This may result in premature OOPS inside > - * mem_cgroup_get_nr_swap_pages() function in corner cases. > - */ > static int __init mem_cgroup_swap_init(void) > { > - /* No memory control -> no swap control */ > - if (mem_cgroup_disabled()) > - cgroup_memory_noswap = true; > - > - if (cgroup_memory_noswap) > - return 0; > - Shakeel suggested "if (mem_cgroup_disabled()) return 0;" here, and that was the first thing I tried when I got the crash. It did not help, as you predicted. Some moments I think it would be good to put in, some moments I think not: whatever. > WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys, swap_files)); > WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, memsw_files)); > > return 0; > } > -core_initcall(mem_cgroup_swap_init); > +subsys_initcall(mem_cgroup_swap_init); > > #endif /* CONFIG_MEMCG_SWAP */ > -- > 2.30.1