This patch add 'delay_millisecs', 'mini_order', 'batch_size', in '/sys/kernel/mm/page_report/'. Usage: "delay_millisecs": Time delay interval between page free and work start to run. "mini_order": Only pages with order equal or greater than mini_order will be reported. "batch_size" Wake up the worker only when free pages total size are greater than 'batch_size'. Cc: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxx> Cc: David Hildenbrand <david@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Alex Williamson <alex.williamson@xxxxxxxxxx> Signed-off-by: liliangleo <liliangleo@xxxxxxxxxxxxxx> --- mm/page_reporting.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++-- mm/page_reporting.h | 4 +- 2 files changed, 141 insertions(+), 7 deletions(-) diff --git a/mm/page_reporting.c b/mm/page_reporting.c index dc7a22a4b752..cc6a42596560 100644 --- a/mm/page_reporting.c +++ b/mm/page_reporting.c @@ -7,15 +7,19 @@ #include <linux/delay.h> #include <linux/scatterlist.h> #include <linux/sched.h> +#include <linux/kobject.h> #include "page_reporting.h" #include "internal.h" -#define PAGE_REPORTING_DELAY (2 * HZ) #define MAX_SCAN_NUM 1024 unsigned long page_report_batch_size __read_mostly = 4 * 1024 * 1024UL; +static unsigned long page_report_delay_millisecs __read_mostly = 2000; + +unsigned int page_report_mini_order __read_mostly = 8; + static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly; enum { @@ -48,7 +52,8 @@ __page_reporting_request(struct page_reporting_dev_info *prdev) * now we are limiting this to running no more than once every * couple of seconds. */ - schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY); + schedule_delayed_work(&prdev->work, + msecs_to_jiffies(page_report_delay_millisecs)); } /* notify prdev of free page reporting request */ @@ -260,7 +265,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev, /* Generate minimum watermark to be able to guarantee progress */ watermark = low_wmark_pages(zone) + - (PAGE_REPORTING_CAPACITY << PAGE_REPORTING_MIN_ORDER); + (PAGE_REPORTING_CAPACITY << page_report_mini_order); /* * Cancel request if insufficient free memory or if we failed @@ -270,7 +275,7 @@ page_reporting_process_zone(struct page_reporting_dev_info *prdev, return err; /* Process each free list starting from lowest order/mt */ - for (order = PAGE_REPORTING_MIN_ORDER; order < MAX_ORDER; order++) { + for (order = page_report_mini_order; order < MAX_ORDER; order++) { for (mt = 0; mt < MIGRATE_TYPES; mt++) { /* We do not pull pages from the isolate free list */ if (is_migrate_isolate(mt)) @@ -337,7 +342,8 @@ static void page_reporting_process(struct work_struct *work) */ state = atomic_cmpxchg(&prdev->state, state, PAGE_REPORTING_IDLE); if (state == PAGE_REPORTING_REQUESTED) - schedule_delayed_work(&prdev->work, PAGE_REPORTING_DELAY); + schedule_delayed_work(&prdev->work, + msecs_to_jiffies(page_report_delay_millisecs)); } static DEFINE_MUTEX(page_reporting_mutex); @@ -393,3 +399,131 @@ void page_reporting_unregister(struct page_reporting_dev_info *prdev) mutex_unlock(&page_reporting_mutex); } EXPORT_SYMBOL_GPL(page_reporting_unregister); + +static ssize_t batch_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", page_report_batch_size); +} + +static ssize_t batch_size_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long size; + int err; + + err = kstrtoul(buf, 10, &size); + if (err || size >= UINT_MAX) + return -EINVAL; + + page_report_batch_size = size; + + return count; +} + +static struct kobj_attribute batch_size_attr = + __ATTR(batch_size, 0644, batch_size_show, batch_size_store); + +static ssize_t delay_millisecs_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", page_report_delay_millisecs); +} + +static ssize_t delay_millisecs_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned long msecs; + int err; + + err = kstrtoul(buf, 10, &msecs); + if (err || msecs >= UINT_MAX) + return -EINVAL; + + page_report_delay_millisecs = msecs; + + return count; +} + +static struct kobj_attribute wake_delay_millisecs_attr = + __ATTR(delay_millisecs, 0644, delay_millisecs_show, + delay_millisecs_store); + +static ssize_t mini_order_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", page_report_mini_order); +} + +static ssize_t mini_order_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + unsigned int order; + int err; + + err = kstrtouint(buf, 10, &order); + if (err || order >= MAX_ORDER) + return -EINVAL; + + if (page_report_mini_order != order) { + mutex_lock(&page_reporting_mutex); + page_report_mini_order = order; + mutex_unlock(&page_reporting_mutex); + } + + return count; +} + +static struct kobj_attribute mini_order_attr = + __ATTR(mini_order, 0644, mini_order_show, mini_order_store); + +static struct attribute *page_report_attr[] = { + &mini_order_attr.attr, + &wake_delay_millisecs_attr.attr, + &batch_size_attr.attr, + NULL, +}; + +static struct attribute_group page_report_attr_group = { + .attrs = page_report_attr, +}; + +static int __init page_report_init_sysfs(struct kobject **page_report_kobj) +{ + int err; + + *page_report_kobj = kobject_create_and_add("page_report", mm_kobj); + if (unlikely(!*page_report_kobj)) { + pr_err("page_report: failed to create page_report kobject\n"); + return -ENOMEM; + } + + err = sysfs_create_group(*page_report_kobj, &page_report_attr_group); + if (err) { + pr_err("page_report: failed to register page_report group\n"); + goto delete_obj; + } + + return 0; + +delete_obj: + kobject_put(*page_report_kobj); + return err; +} + +static int __init page_report_init(void) +{ + int err; + struct kobject *page_report_kobj; + + msecs_to_jiffies(page_report_delay_millisecs); + err = page_report_init_sysfs(&page_report_kobj); + if (err) + return err; + + return 0; +} +subsys_initcall(page_report_init); diff --git a/mm/page_reporting.h b/mm/page_reporting.h index f18c85ecdfe0..5e52777c934d 100644 --- a/mm/page_reporting.h +++ b/mm/page_reporting.h @@ -10,7 +10,7 @@ #include <asm/pgtable.h> #include <linux/scatterlist.h> -#define PAGE_REPORTING_MIN_ORDER pageblock_order +extern unsigned int page_report_mini_order; extern unsigned long page_report_batch_size; @@ -42,7 +42,7 @@ static inline void page_reporting_notify_free(unsigned int order) return; /* Determine if we have crossed reporting threshold */ - if (order < PAGE_REPORTING_MIN_ORDER) + if (order < page_report_mini_order) return; batch_size += (1 << order) << PAGE_SHIFT; -- 2.14.1