Two new sysfs files are added to demote hugtlb pages. These files are both per-hugetlb page size and per node. Files are: demote_size - The size in Kb that pages are demoted to. demote - The number of huge pages to demote. Writing a value to demote will result in an attempt to demote that number of hugetlb pages to an appropriate number of demote_size pages. This patch does not provide full demote functionality. It only provides the sysfs interfaces and uses existing code to free pages to the buddy allocator is demote_size == PAGESIZE. Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> --- include/linux/hugetlb.h | 1 + mm/hugetlb.c | 117 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 117 insertions(+), 1 deletion(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index cccd1aab69dd..5e9d6c8ab411 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -558,6 +558,7 @@ struct hstate { int next_nid_to_alloc; int next_nid_to_free; unsigned int order; + unsigned int demote_order; unsigned long mask; unsigned long max_huge_pages; unsigned long nr_huge_pages; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 8fb42c6dd74b..161732ba7aaf 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2492,7 +2492,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) static void __init hugetlb_init_hstates(void) { - struct hstate *h; + struct hstate *h, *h2; for_each_hstate(h) { if (minimum_order > huge_page_order(h)) @@ -2501,6 +2501,17 @@ static void __init hugetlb_init_hstates(void) /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); + + /* + * Set demote order for each hstate. Note that + * h->demote_order is initially 0. + */ + for_each_hstate(h2) { + if (h2 == h) + continue; + if (h2->order < h->order && h2->order > h->demote_order) + h->demote_order = h2->order; + } } VM_BUG_ON(minimum_order == UINT_MAX); } @@ -2710,6 +2721,20 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, return 0; } +static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) +{ + int rc = 0; + + /* If no demote order, free to buddy */ + if (!h->demote_order) + return free_pool_huge_page(h, nodes_allowed, 0); + + /* + * TODO - demote fucntionality will be added in subsequent patch + */ + return rc; +} + #define HSTATE_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) @@ -2908,12 +2933,100 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj, } HSTATE_ATTR_RO(surplus_hugepages); +static ssize_t demote_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "0\n"); +} + +static ssize_t demote_store_action(struct kobject *kobj, const char *buf, + size_t len) +{ + unsigned long nr_demote; + unsigned long nr_available; + nodemask_t nodes_allowed, *n_mask; + struct hstate *h; + int err; + int nid; + + err = kstrtoul(buf, 10, &nr_demote); + if (err) + return err; + h = kobj_to_hstate(kobj, &nid); + + spin_lock(&hugetlb_lock); + if (nid != NUMA_NO_NODE) { + nr_available = h->free_huge_pages_node[nid]; + init_nodemask_of_node(&nodes_allowed, nid); + n_mask = &nodes_allowed; + } else { + nr_available = h->free_huge_pages; + n_mask = &node_states[N_MEMORY]; + } + nr_available -= h->resv_huge_pages; + if (nr_available <= 0) + goto out; + nr_demote = min(nr_available, nr_demote); + + while (nr_demote) { + if (!demote_pool_huge_page(h, n_mask)) + break; + + cond_resched_lock(&hugetlb_lock); + /* + * We may have dropped the lock above or in the routines to + * demote/free a page. Recompute nr_demote as counts could + * have changed and we want to make sure we do not demote + * a reserved huge page. + */ + nr_demote--; + if (nid != NUMA_NO_NODE) + nr_available = h->free_huge_pages_node[nid]; + else + nr_available = h->free_huge_pages; + nr_available -= h->resv_huge_pages; + if (nr_available <= 0) + nr_demote = 0; + else + nr_demote = min(nr_available, nr_demote); + } + +out: + spin_unlock(&hugetlb_lock); + + return len; +} + +static ssize_t demote_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t len) +{ + return demote_store_action(kobj, buf, len); +} +HSTATE_ATTR(demote); + +static ssize_t demote_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h; + unsigned long demote_size; + int nid; + + h = kobj_to_hstate(kobj, &nid); + demote_size = h->demote_order; + + return sysfs_emit(buf, "%lukB\n", + (unsigned long)(PAGE_SIZE << h->demote_order) / SZ_1K); +} +HSTATE_ATTR_RO(demote_size); + static struct attribute *hstate_attrs[] = { &nr_hugepages_attr.attr, &nr_overcommit_hugepages_attr.attr, &free_hugepages_attr.attr, &resv_hugepages_attr.attr, &surplus_hugepages_attr.attr, + &demote_size_attr.attr, + &demote_attr.attr, #ifdef CONFIG_NUMA &nr_hugepages_mempolicy_attr.attr, #endif @@ -2983,6 +3096,8 @@ static struct attribute *per_node_hstate_attrs[] = { &nr_hugepages_attr.attr, &free_hugepages_attr.attr, &surplus_hugepages_attr.attr, + &demote_size_attr.attr, + &demote_attr.attr, NULL, }; -- 2.29.2