On Mon 21-10-19 19:56:54, Hillf Danton wrote: > > Currently soft limit reclaim is frozen, see > Documentation/admin-guide/cgroup-v2.rst for reasons. > > Copying the page lru idea, memcg lru is added for selecting victim > memcg to reclaim pages from under memory pressure. It now works in > parallel to slr not only because the latter needs some time to reap > but the coexistence facilitates it a lot to add the lru in a straight > forward manner. This doesn't explain what is the problem/feature you would like to fix/achieve. It also doesn't explain the overall design. > A lru list paired with a spin lock is added, thanks to the current > memcg high_work that provides other things it needs, and a couple of > helpers to add memcg to and pick victim from lru. > > V1 is based on 5.4-rc3. > > Changes since v0 > - add MEMCG_LRU in init/Kconfig > - drop changes in mm/vmscan.c > - make memcg lru work in parallel to slr > > Cc: Chris Down <chris@xxxxxxxxxxxxxx> > Cc: Tejun Heo <tj@xxxxxxxxxx> > Cc: Roman Gushchin <guro@xxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxxxx> > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > Cc: Shakeel Butt <shakeelb@xxxxxxxxxx> > Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx> > Cc: Minchan Kim <minchan@xxxxxxxxxx> > Cc: Mel Gorman <mgorman@xxxxxxx> > Signed-off-by: Hillf Danton <hdanton@xxxxxxxx> > --- > > --- a/init/Kconfig > +++ b/init/Kconfig > @@ -843,6 +843,14 @@ config MEMCG > help > Provides control over the memory footprint of tasks in a cgroup. > > +config MEMCG_LRU > + bool > + depends on MEMCG > + help > + Select victim memcg on lru for page reclaiming. > + > + Say N if unsure. > + > config MEMCG_SWAP > bool "Swap controller" > depends on MEMCG && SWAP > --- a/include/linux/memcontrol.h > +++ b/include/linux/memcontrol.h > @@ -223,6 +223,10 @@ struct mem_cgroup { > /* Upper bound of normal memory consumption range */ > unsigned long high; > > +#ifdef CONFIG_MEMCG_LRU > + struct list_head lru_node; > +#endif > + > /* Range enforcement for interrupt charges */ > struct work_struct high_work; > > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -2338,14 +2338,54 @@ static int memcg_hotplug_cpu_dead(unsign > return 0; > } > > +#ifdef CONFIG_MEMCG_LRU > +static DEFINE_SPINLOCK(memcg_lru_lock); > +static LIST_HEAD(memcg_lru); /* a copy of page lru */ > + > +static void memcg_add_lru(struct mem_cgroup *memcg) > +{ > + spin_lock_irq(&memcg_lru_lock); > + if (list_empty(&memcg->lru_node)) > + list_add_tail(&memcg->lru_node, &memcg_lru); > + spin_unlock_irq(&memcg_lru_lock); > +} > + > +static struct mem_cgroup *memcg_pick_lru(void) > +{ > + struct mem_cgroup *memcg, *next; > + > + spin_lock_irq(&memcg_lru_lock); > + > + list_for_each_entry_safe(memcg, next, &memcg_lru, lru_node) { > + list_del_init(&memcg->lru_node); > + > + if (page_counter_read(&memcg->memory) > memcg->high) { > + spin_unlock_irq(&memcg_lru_lock); > + return memcg; > + } > + } > + spin_unlock_irq(&memcg_lru_lock); > + > + return NULL; > +} > +#endif > + > static void reclaim_high(struct mem_cgroup *memcg, > unsigned int nr_pages, > gfp_t gfp_mask) > { > +#ifdef CONFIG_MEMCG_LRU > + struct mem_cgroup *start = memcg; > +#endif > do { > if (page_counter_read(&memcg->memory) <= memcg->high) > continue; > memcg_memory_event(memcg, MEMCG_HIGH); > + if (IS_ENABLED(CONFIG_MEMCG_LRU)) > + if (start != memcg) { > + memcg_add_lru(memcg); > + return; > + } > try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); > } while ((memcg = parent_mem_cgroup(memcg))); > } > @@ -3158,6 +3198,13 @@ unsigned long mem_cgroup_soft_limit_recl > unsigned long excess; > unsigned long nr_scanned; > > + if (IS_ENABLED(CONFIG_MEMCG_LRU)) { > + struct mem_cgroup *memcg = memcg_pick_lru(); > + if (memcg) > + schedule_work(&memcg->high_work); > + return 0; > + } > + > if (order > 0) > return 0; > > @@ -5068,6 +5115,8 @@ static struct mem_cgroup *mem_cgroup_all > if (memcg_wb_domain_init(memcg, GFP_KERNEL)) > goto fail; > > + if (IS_ENABLED(CONFIG_MEMCG_LRU)) > + INIT_LIST_HEAD(&memcg->lru_node); > INIT_WORK(&memcg->high_work, high_work_func); > memcg->last_scanned_node = MAX_NUMNODES; > INIT_LIST_HEAD(&memcg->oom_notify); > -- > -- Michal Hocko SUSE Labs