[PATCH 2/4] mm/page_reporting: Introduce free page reporting factor

Xunlei Pang <xlpang@xxxxxxxxxxxxxxxxx> · Fri, 26 Mar 2021 17:44:56 +0800

Add new "/sys/kernel/mm/page_reporting/reporting_factor"
within [0, 100], and stop page reporting when it reaches
the configured threshold. Default is 100 which means no
limitation is imposed. Percentile is adopted to reflect
the fact that it reports on the per-zone basis.

We can control the total number of reporting pages via
this knob to avoid EPT violations which may affect the
performance of the business, imagine the guest memory
allocation burst or host long-tail memory reclaiming
really hurt.

This knob can help make customized control policies according
to VM priority, it is also useful for testing, gray-release, etc.

Signed-off-by: Xunlei Pang <xlpang@xxxxxxxxxxxxxxxxx>
---
 mm/page_reporting.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index ba195ea..86c6479 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -11,6 +11,8 @@
 #include "page_reporting.h"
 #include "internal.h"
 
+static int reporting_factor = 100;
+
 #define PAGE_REPORTING_DELAY	(2 * HZ)
 static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
 
@@ -134,6 +136,7 @@ void __page_reporting_notify(void)
 	struct list_head *list = &area->free_list[mt];
 	unsigned int page_len = PAGE_SIZE << order;
 	struct page *page, *next;
+	unsigned long threshold;
 	long budget;
 	int err = 0;
 
@@ -144,6 +147,7 @@ void __page_reporting_notify(void)
 	if (list_empty(list))
 		return err;
 
+	threshold = atomic_long_read(&zone->managed_pages) * reporting_factor / 100;
 	spin_lock_irq(&zone->lock);
 
 	/*
@@ -181,6 +185,8 @@ void __page_reporting_notify(void)
 
 		/* Attempt to pull page from list and place in scatterlist */
 		if (*offset) {
+			unsigned long nr_pages;
+
 			if (!__isolate_free_page(page, order)) {
 				next = page;
 				break;
@@ -190,6 +196,12 @@ void __page_reporting_notify(void)
 			--(*offset);
 			sg_set_page(&sgl[*offset], page, page_len, 0);
 
+			nr_pages = (PAGE_REPORTING_CAPACITY - *offset) << order;
+			if (zone->reported_pages + nr_pages >= threshold) {
+				err = 1;
+				break;
+			}
+
 			continue;
 		}
 
@@ -244,9 +256,13 @@ void __page_reporting_notify(void)
 			    struct scatterlist *sgl, struct zone *zone)
 {
 	unsigned int order, mt, leftover, offset = PAGE_REPORTING_CAPACITY;
-	unsigned long watermark;
+	unsigned long watermark, threshold;
 	int err = 0;
 
+	threshold = atomic_long_read(&zone->managed_pages) * reporting_factor / 100;
+	if (zone->reported_pages >= threshold)
+		return err;
+
 	/* Generate minimum watermark to be able to guarantee progress */
 	watermark = low_wmark_pages(zone) +
 		    (PAGE_REPORTING_CAPACITY << PAGE_REPORTING_MIN_ORDER);
@@ -267,11 +283,18 @@ void __page_reporting_notify(void)
 
 			err = page_reporting_cycle(prdev, zone, order, mt,
 						   sgl, &offset);
+			/* Exceed threshold go to report leftover */
+			if (err > 0) {
+				err = 0;
+				goto leftover;
+			}
+
 			if (err)
 				return err;
 		}
 	}
 
+leftover:
 	/* report the leftover pages before going idle */
 	leftover = PAGE_REPORTING_CAPACITY - offset;
 	if (leftover) {
@@ -435,9 +458,44 @@ static ssize_t refault_kbytes_store(struct kobject *kobj,
 }
 REPORTING_ATTR(refault_kbytes);
 
+static ssize_t reporting_factor_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", reporting_factor);
+}
+
+static ssize_t reporting_factor_store(struct kobject *kobj,
+		struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	int new, old, err;
+	struct page *page;
+
+	err = kstrtoint(buf, 10, &new);
+	if (err || (new < 0 || new > 100))
+		return -EINVAL;
+
+	old = reporting_factor;
+	reporting_factor = new;
+
+	if (new <= old)
+		goto out;
+
+	/* Trigger reporting with new larger reporting_factor */
+	page = alloc_pages(__GFP_HIGHMEM | __GFP_NOWARN,
+			PAGE_REPORTING_MIN_ORDER);
+	if (page)
+		__free_pages(page, PAGE_REPORTING_MIN_ORDER);
+
+out:
+	return count;
+}
+REPORTING_ATTR(reporting_factor);
+
 static struct attribute *reporting_attrs[] = {
 	&reported_kbytes_attr.attr,
 	&refault_kbytes_attr.attr,
+	&reporting_factor_attr.attr,
 	NULL,
 };
 
-- 
1.8.3.1