[RFC v1] memcg: add memcg lru for page reclaiming

Hillf Danton <hdanton@xxxxxxxx> · Mon, 21 Oct 2019 19:56:54 +0800

Currently soft limit reclaim is frozen, see
Documentation/admin-guide/cgroup-v2.rst for reasons.

Copying the page lru idea, memcg lru is added for selecting victim
memcg to reclaim pages from under memory pressure. It now works in
parallel to slr not only because the latter needs some time to reap
but the coexistence facilitates it a lot to add the lru in a straight
forward manner.

A lru list paired with a spin lock is added, thanks to the current
memcg high_work that provides other things it needs, and a couple of
helpers to add memcg to and pick victim from lru.

V1 is based on 5.4-rc3.

Changes since v0
- add MEMCG_LRU in init/Kconfig
- drop changes in mm/vmscan.c
- make memcg lru work in parallel to slr

Cc: Chris Down <chris@xxxxxxxxxxxxxx>
Cc: Tejun Heo <tj@xxxxxxxxxx>
Cc: Roman Gushchin <guro@xxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: Shakeel Butt <shakeelb@xxxxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Signed-off-by: Hillf Danton <hdanton@xxxxxxxx>
---

--- a/init/Kconfig
+++ b/init/Kconfig
@@ -843,6 +843,14 @@ config MEMCG
 	help
 	  Provides control over the memory footprint of tasks in a cgroup.
 
+config MEMCG_LRU
+	bool
+	depends on MEMCG
+	help
+	  Select victim memcg on lru for page reclaiming.
+
+	  Say N if unsure.
+
 config MEMCG_SWAP
 	bool "Swap controller"
 	depends on MEMCG && SWAP
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -223,6 +223,10 @@ struct mem_cgroup {
 	/* Upper bound of normal memory consumption range */
 	unsigned long high;
 
+#ifdef CONFIG_MEMCG_LRU
+	struct list_head lru_node;
+#endif
+
 	/* Range enforcement for interrupt charges */
 	struct work_struct high_work;
 
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2338,14 +2338,54 @@ static int memcg_hotplug_cpu_dead(unsign
 	return 0;
 }
 
+#ifdef CONFIG_MEMCG_LRU
+static DEFINE_SPINLOCK(memcg_lru_lock);
+static LIST_HEAD(memcg_lru);	/* a copy of page lru */
+
+static void memcg_add_lru(struct mem_cgroup *memcg)
+{
+	spin_lock_irq(&memcg_lru_lock);
+	if (list_empty(&memcg->lru_node))
+		list_add_tail(&memcg->lru_node, &memcg_lru);
+	spin_unlock_irq(&memcg_lru_lock);
+}
+
+static struct mem_cgroup *memcg_pick_lru(void)
+{
+	struct mem_cgroup *memcg, *next;
+
+	spin_lock_irq(&memcg_lru_lock);
+
+	list_for_each_entry_safe(memcg, next, &memcg_lru, lru_node) {
+		list_del_init(&memcg->lru_node);
+
+		if (page_counter_read(&memcg->memory) > memcg->high) {
+			spin_unlock_irq(&memcg_lru_lock);
+			return memcg;
+		}
+	}
+	spin_unlock_irq(&memcg_lru_lock);
+
+	return NULL;
+}
+#endif
+
 static void reclaim_high(struct mem_cgroup *memcg,
 			 unsigned int nr_pages,
 			 gfp_t gfp_mask)
 {
+#ifdef CONFIG_MEMCG_LRU
+	struct mem_cgroup *start = memcg;
+#endif
 	do {
 		if (page_counter_read(&memcg->memory) <= memcg->high)
 			continue;
 		memcg_memory_event(memcg, MEMCG_HIGH);
+		if (IS_ENABLED(CONFIG_MEMCG_LRU))
+			if (start != memcg) {
+				memcg_add_lru(memcg);
+				return;
+			}
 		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
 	} while ((memcg = parent_mem_cgroup(memcg)));
 }
@@ -3158,6 +3198,13 @@ unsigned long mem_cgroup_soft_limit_recl
 	unsigned long excess;
 	unsigned long nr_scanned;
 
+	if (IS_ENABLED(CONFIG_MEMCG_LRU)) {
+		struct mem_cgroup *memcg = memcg_pick_lru();
+		if (memcg)
+			schedule_work(&memcg->high_work);
+		return 0;
+	}
+
 	if (order > 0)
 		return 0;
 
@@ -5068,6 +5115,8 @@ static struct mem_cgroup *mem_cgroup_all
 	if (memcg_wb_domain_init(memcg, GFP_KERNEL))
 		goto fail;
 
+	if (IS_ENABLED(CONFIG_MEMCG_LRU))
+		INIT_LIST_HEAD(&memcg->lru_node);
 	INIT_WORK(&memcg->high_work, high_work_func);
 	memcg->last_scanned_node = MAX_NUMNODES;
 	INIT_LIST_HEAD(&memcg->oom_notify);
--