[RFC 3/6] mm, thp: introduce zero subpages reclaim threshold

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In this patch, we add memory.thp_reclaim_ctrl for each memory
cgroup to control thp reclaim.

The first controller "threshold" is to set the reclaim threshold.
The default value is 16, which means if a huge page contains over
16 zero subpages (estimated), the huge page can be split and the
zero subpages can be reclaimed when the zero subpages reclaim is
enable.

You can change this value by:

  echo "threshold $v" > /sys/fs/cgroup/memory/{memcg}/thp_reclaim_ctrl

Signed-off-by: Ning Zhang <ningzhang@xxxxxxxxxxxxxxxxx>
---
 include/linux/huge_mm.h    |  3 ++-
 include/linux/memcontrol.h |  3 +++
 mm/huge_memory.c           |  9 ++++---
 mm/memcontrol.c            | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/vmscan.c                |  4 ++-
 5 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 04607b1..304e3df 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -187,7 +187,8 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 
 #ifdef CONFIG_MEMCG
 extern int global_thp_reclaim;
-int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page);
+int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page,
+		  int threshold);
 unsigned long zsr_reclaim_hpage(struct lruvec *lruvec, struct page *page);
 static inline struct list_head *hpage_reclaim_list(struct page *page)
 {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f99f13f..4815c56 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -237,6 +237,8 @@ enum thp_reclaim_state {
 	THP_RECLAIM_ENABLE,
 	THP_RECLAIM_MEMCG, /* For global configure*/
 };
+
+#define THP_RECLAIM_THRESHOLD_DEFAULT  16
 #endif
 /*
  * The memory controller data structure. The memory controller controls both
@@ -356,6 +358,7 @@ struct mem_cgroup {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	struct deferred_split deferred_split_queue;
 	int thp_reclaim;
+	int thp_reclaim_threshold;
 #endif
 
 	struct mem_cgroup_per_node *nodeinfo[];
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 84fd738..40a9879 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3270,7 +3270,7 @@ static inline bool is_zero_page(struct page *page)
  * We'll split the huge page iff it contains at least 1/32 zeros,
  * estimate it by checking some discrete unsigned long values.
  */
-static bool hpage_estimate_zero(struct page *page)
+static bool hpage_estimate_zero(struct page *page, int threshold)
 {
 	unsigned int i, maybe_zero_pages = 0, offset = 0;
 	void *addr;
@@ -3281,7 +3281,7 @@ static bool hpage_estimate_zero(struct page *page)
 		if (unlikely((offset + 1) * BYTES_PER_LONG > PAGE_SIZE))
 			offset = 0;
 		if (*(const unsigned long *)(addr + offset) == 0UL) {
-			if (++maybe_zero_pages == HPAGE_PMD_NR >> 5) {
+			if (++maybe_zero_pages == threshold) {
 				kunmap(page);
 				return true;
 			}
@@ -3456,7 +3456,8 @@ static unsigned long reclaim_zero_subpages(struct list_head *list,
  * be stored in reclaim_page; otherwise, just delete the page from the
  * queue.
  */
-int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page)
+int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page,
+		  int threshold)
 {
 	struct page *page = NULL;
 	unsigned long flags;
@@ -3482,7 +3483,7 @@ int zsr_get_hpage(struct hpage_reclaim *hr_queue, struct page **reclaim_page)
 
 	spin_unlock_irqrestore(&hr_queue->reclaim_queue_lock, flags);
 
-	if (hpage_can_reclaim(page) && hpage_estimate_zero(page) &&
+	if (hpage_can_reclaim(page) && hpage_estimate_zero(page, threshold) &&
 	    !isolate_lru_page(page)) {
 		__mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON,
 				      HPAGE_PMD_NR);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae96781..7ba3c69 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4498,6 +4498,61 @@ static int mem_cgroup_thp_reclaim_write(struct cgroup_subsys_state *css,
 
 	return 0;
 }
+
+static inline char *strsep_s(char **s, const char *ct)
+{
+	char *p;
+
+	while ((p = strsep(s, ct))) {
+		if (*p)
+			return p;
+	}
+
+	return NULL;
+}
+
+static int memcg_thp_reclaim_ctrl_show(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	int thp_reclaim_threshold = READ_ONCE(memcg->thp_reclaim_threshold);
+
+	seq_printf(m, "threshold\t%d\n", thp_reclaim_threshold);
+
+	return 0;
+}
+
+static ssize_t memcg_thp_reclaim_ctrl_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	char *key, *value;
+	int ret;
+
+	key = strsep_s(&buf, " \t\n");
+	if (!key)
+		return -EINVAL;
+
+	if (!strcmp(key, "threshold")) {
+		int threshold;
+
+		value = strsep_s(&buf, " \t\n");
+		if (!value)
+			return -EINVAL;
+
+		ret = kstrtouint(value, 0, &threshold);
+		if (ret)
+			return ret;
+
+		if (threshold > HPAGE_PMD_NR || threshold < 1)
+			return -EINVAL;
+
+		xchg(&memcg->thp_reclaim_threshold, threshold);
+	} else
+		return -EINVAL;
+
+	return nbytes;
+}
 #endif
 
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -5068,6 +5123,11 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 		.read_u64 = mem_cgroup_thp_reclaim_read,
 		.write_u64 = mem_cgroup_thp_reclaim_write,
 	},
+	{
+		.name = "thp_reclaim_ctrl",
+		.seq_show = memcg_thp_reclaim_ctrl_show,
+		.write = memcg_thp_reclaim_ctrl_write,
+	},
 #endif
 	{ },	/* terminate */
 };
@@ -5265,6 +5325,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	memcg->deferred_split_queue.split_queue_len = 0;
 
 	memcg->thp_reclaim = THP_RECLAIM_DISABLE;
+	memcg->thp_reclaim_threshold = THP_RECLAIM_THRESHOLD_DEFAULT;
 #endif
 	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
 	return memcg;
@@ -5300,6 +5361,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 		page_counter_init(&memcg->tcpmem, &parent->tcpmem);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		memcg->thp_reclaim = parent->thp_reclaim;
+		memcg->thp_reclaim_threshold = parent->thp_reclaim_threshold;
 #endif
 	} else {
 		page_counter_init(&memcg->memory, NULL);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f4ff14d..fcc80a6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2794,6 +2794,7 @@ static unsigned long reclaim_hpage_zero_subpages(struct lruvec *lruvec,
 	struct mem_cgroup *memcg;
 	struct hpage_reclaim *hr_queue;
 	int nid = lruvec->pgdat->node_id;
+	int threshold;
 	unsigned long nr_reclaimed = 0, nr_scanned = 0, nr_to_scan;
 
 	memcg = lruvec_memcg(lruvec);
@@ -2806,11 +2807,12 @@ static unsigned long reclaim_hpage_zero_subpages(struct lruvec *lruvec,
 
 	/* The last scan loop will scan all the huge pages.*/
 	nr_to_scan = priority == 0 ? 0 : MAX_SCAN_HPAGE;
+	threshold = READ_ONCE(memcg->thp_reclaim_threshold);
 
 	do {
 		struct page *page = NULL;
 
-		if (zsr_get_hpage(hr_queue, &page))
+		if (zsr_get_hpage(hr_queue, &page, threshold))
 			break;
 
 		if (!page)
-- 
1.8.3.1





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux