[PATCH V7 9/9] Enable per-memcg background reclaim.

Ying Han <yinghan@xxxxxxxxxx> · Thu, 21 Apr 2011 21:24:20 -0700

By default the per-memcg background reclaim is disabled when the limit_in_bytes
is set the maximum. The kswapd_run() is called when the memcg is being resized,
and kswapd_stop() is called when the memcg is being deleted.

The per-memcg kswapd is waked up based on the usage and low_wmark, which is
checked once per 1024 increments per cpu. The memcg's kswapd is waked up if the
usage is larger than the low_wmark.

changelog v7..v6:
1. merge the thread-pool and add memcg_kswapd_stop(), memcg_kswapd_init() based
on thread-pool.

changelog v4..v3:
1. move kswapd_stop to mem_cgroup_destroy based on comments from KAMAZAWA
2. move kswapd_run to setup_mem_cgroup_wmark, since the actual watermarks
determines whether or not enabling per-memcg background reclaim.

changelog v3..v2:
1. some clean-ups

changelog v2..v1:
1. start/stop the per-cgroup kswapd at create/delete cgroup stage.
2. remove checking the wmark from per-page charging. now it checks the wmark
periodically based on the event counter.

Signed-off-by: Ying Han <yinghan@xxxxxxxxxx>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx>
---
 mm/memcontrol.c |   61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 61 insertions(+), 0 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9e535b2..a98471b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -107,10 +107,12 @@ enum mem_cgroup_events_index {
 enum mem_cgroup_events_target {
 	MEM_CGROUP_TARGET_THRESH,
 	MEM_CGROUP_TARGET_SOFTLIMIT,
+	MEM_CGROUP_WMARK_EVENTS_THRESH,
 	MEM_CGROUP_NTARGETS,
 };
 #define THRESHOLDS_EVENTS_TARGET (128)
 #define SOFTLIMIT_EVENTS_TARGET (1024)
+#define WMARK_EVENTS_TARGET (1024)
 
 struct mem_cgroup_stat_cpu {
 	long count[MEM_CGROUP_STAT_NSTATS];
@@ -379,6 +381,9 @@ static void mem_cgroup_put(struct mem_cgroup *mem);
 static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
 static void drain_all_stock_async(void);
 
+static void wake_memcg_kswapd(struct mem_cgroup *mem);
+static void memcg_kswapd_stop(struct mem_cgroup *mem);
+
 static struct mem_cgroup_per_zone *
 mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
 {
@@ -557,6 +562,12 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
 	return mz;
 }
 
+static void mem_cgroup_check_wmark(struct mem_cgroup *mem)
+{
+	if (!mem_cgroup_watermark_ok(mem, CHARGE_WMARK_LOW))
+		wake_memcg_kswapd(mem);
+}
+
 /*
  * Implementation Note: reading percpu statistics for memcg.
  *
@@ -687,6 +698,9 @@ static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target)
 	case MEM_CGROUP_TARGET_SOFTLIMIT:
 		next = val + SOFTLIMIT_EVENTS_TARGET;
 		break;
+	case MEM_CGROUP_WMARK_EVENTS_THRESH:
+		next = val + WMARK_EVENTS_TARGET;
+		break;
 	default:
 		return;
 	}
@@ -710,6 +724,10 @@ static void memcg_check_events(struct mem_cgroup *mem, struct page *page)
 			__mem_cgroup_target_update(mem,
 				MEM_CGROUP_TARGET_SOFTLIMIT);
 		}
+		if (unlikely(__memcg_event_check(mem,
+			MEM_CGROUP_WMARK_EVENTS_THRESH))){
+			mem_cgroup_check_wmark(mem);
+		}
 	}
 }
 
@@ -3651,6 +3669,7 @@ move_account:
 		ret = -EBUSY;
 		if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
 			goto out;
+		memcg_kswapd_stop(mem);
 		ret = -EINTR;
 		if (signal_pending(current))
 			goto out;
@@ -4572,6 +4591,21 @@ struct memcg_kswapd_work {
 
 struct memcg_kswapd_work memcg_kswapd_control;
 
+static void wake_memcg_kswapd(struct mem_cgroup *mem)
+{
+	/* already running */
+	if (atomic_read(&mem->kswapd_running))
+		return;
+
+	spin_lock(&memcg_kswapd_control.lock);
+	if (list_empty(&mem->memcg_kswapd_wait_list))
+		list_add_tail(&mem->memcg_kswapd_wait_list,
+				&memcg_kswapd_control.list);
+	spin_unlock(&memcg_kswapd_control.lock);
+	wake_up(&memcg_kswapd_control.waitq);
+	return;
+}
+
 static void memcg_kswapd_wait_end(struct mem_cgroup *mem)
 {
 	DEFINE_WAIT(wait);
@@ -4582,6 +4616,17 @@ static void memcg_kswapd_wait_end(struct mem_cgroup *mem)
 	finish_wait(&mem->memcg_kswapd_end, &wait);
 }
 
+/* called at pre_destroy */
+static void memcg_kswapd_stop(struct mem_cgroup *mem)
+{
+	spin_lock(&memcg_kswapd_control.lock);
+	if (!list_empty(&mem->memcg_kswapd_wait_list))
+		list_del(&mem->memcg_kswapd_wait_list);
+	spin_unlock(&memcg_kswapd_control.lock);
+
+	memcg_kswapd_wait_end(mem);
+}
+
 struct mem_cgroup *mem_cgroup_get_shrink_target(void)
 {
 	struct mem_cgroup *mem;
@@ -4631,6 +4676,22 @@ wait_queue_head_t *mem_cgroup_kswapd_waitq(void)
 	return &memcg_kswapd_control.waitq;
 }
 
+static int __init memcg_kswapd_init(void)
+{
+	int i, nr_threads;
+
+	spin_lock_init(&memcg_kswapd_control.lock);
+	INIT_LIST_HEAD(&memcg_kswapd_control.list);
+	init_waitqueue_head(&memcg_kswapd_control.waitq);
+
+	nr_threads = int_sqrt(num_possible_cpus()) + 1;
+	for (i = 0; i < nr_threads; i++)
+		if (kswapd_run(0, i + 1) == -1)
+			break;
+	return 0;
+}
+module_init(memcg_kswapd_init);
+
 static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "usage_in_bytes",
-- 
1.7.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>