Two new sysfs files are added to demote hugtlb pages. These files are both per-hugetlb page size and per node. Files are: demote_size - The size in Kb that pages are demoted to. (read-only) demote - The number of huge pages to demote. (write-only) Writing a value to demote will result in an attempt to demote that number of hugetlb pages to an appropriate number of demote_size pages. This patch does not provide full demote functionality. It only provides the sysfs interfaces and uses existing code to free pages to the buddy allocator if demote_size == PAGESIZE. Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> --- include/linux/hugetlb.h | 1 + mm/hugetlb.c | 121 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 121 insertions(+), 1 deletion(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f7ca1a3870ea..d96e11ce986c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -596,6 +596,7 @@ struct hstate { int next_nid_to_alloc; int next_nid_to_free; unsigned int order; + unsigned int demote_order; unsigned long mask; unsigned long max_huge_pages; unsigned long nr_huge_pages; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 95714fb28150..cebc6dc353f3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2964,7 +2964,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) static void __init hugetlb_init_hstates(void) { - struct hstate *h; + struct hstate *h, *h2; for_each_hstate(h) { if (minimum_order > huge_page_order(h)) @@ -2973,6 +2973,17 @@ static void __init hugetlb_init_hstates(void) /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); + + /* + * Set demote order for each hstate. Note that + * h->demote_order is initially 0. + */ + for_each_hstate(h2) { + if (h2 == h) + continue; + if (h2->order < h->order && h2->order > h->demote_order) + h->demote_order = h2->order; + } } VM_BUG_ON(minimum_order == UINT_MAX); } @@ -3213,9 +3224,36 @@ static int set_max_huge_pages(struct hstate *h, unsigned long count, int nid, return 0; } +static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed) + __must_hold(&hugetlb_lock) +{ + int rc = 0; + + lockdep_assert_held(&hugetlb_lock); + /* If no demote order, free to buddy */ + if (!h->demote_order) { + struct page *page = remove_pool_huge_page(h, nodes_allowed, 0); + + if (!page) + return rc; + spin_unlock_irq(&hugetlb_lock); + update_and_free_page(h, page, false); + spin_lock_irq(&hugetlb_lock); + return 1; + } + + /* + * TODO - demote fucntionality will be added in subsequent patch + */ + return rc; +} + #define HSTATE_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) +#define HSTATE_ATTR_WO(_name) \ + static struct kobj_attribute _name##_attr = __ATTR_WO(_name) + #define HSTATE_ATTR(_name) \ static struct kobj_attribute _name##_attr = \ __ATTR(_name, 0644, _name##_show, _name##_store) @@ -3411,12 +3449,91 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj, } HSTATE_ATTR_RO(surplus_hugepages); +static ssize_t demote_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t len) +{ + unsigned long nr_demote; + unsigned long nr_available; + nodemask_t nodes_allowed, *n_mask; + struct hstate *h; + int err; + int nid; + + err = kstrtoul(buf, 10, &nr_demote); + if (err) + return err; + h = kobj_to_hstate(kobj, &nid); + + /* Synchronize with other sysfs operations modifying huge pages */ + mutex_lock(&h->resize_lock); + + spin_lock_irq(&hugetlb_lock); + if (nid != NUMA_NO_NODE) { + nr_available = h->free_huge_pages_node[nid]; + init_nodemask_of_node(&nodes_allowed, nid); + n_mask = &nodes_allowed; + } else { + nr_available = h->free_huge_pages; + n_mask = &node_states[N_MEMORY]; + } + nr_available -= h->resv_huge_pages; + if (nr_available <= 0) + goto out; + nr_demote = min(nr_available, nr_demote); + + while (nr_demote) { + if (!demote_pool_huge_page(h, n_mask)) + break; + + /* + * We may have dropped the lock in the routines to + * demote/free a page. Recompute nr_demote as counts could + * have changed and we want to make sure we do not demote + * a reserved huge page. + */ + nr_demote--; + if (nid != NUMA_NO_NODE) + nr_available = h->free_huge_pages_node[nid]; + else + nr_available = h->free_huge_pages; + nr_available -= h->resv_huge_pages; + if (nr_available <= 0) + nr_demote = 0; + else + nr_demote = min(nr_available, nr_demote); + } + +out: + spin_unlock_irq(&hugetlb_lock); + mutex_unlock(&h->resize_lock); + + return len; +} +HSTATE_ATTR_WO(demote); + +static ssize_t demote_size_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct hstate *h; + unsigned long demote_size; + int nid; + + h = kobj_to_hstate(kobj, &nid); + demote_size = h->demote_order; + + return sysfs_emit(buf, "%lukB\n", + (unsigned long)(PAGE_SIZE << h->demote_order) / SZ_1K); +} +HSTATE_ATTR_RO(demote_size); + static struct attribute *hstate_attrs[] = { &nr_hugepages_attr.attr, &nr_overcommit_hugepages_attr.attr, &free_hugepages_attr.attr, &resv_hugepages_attr.attr, &surplus_hugepages_attr.attr, + &demote_size_attr.attr, + &demote_attr.attr, #ifdef CONFIG_NUMA &nr_hugepages_mempolicy_attr.attr, #endif @@ -3486,6 +3603,8 @@ static struct attribute *per_node_hstate_attrs[] = { &nr_hugepages_attr.attr, &free_hugepages_attr.attr, &surplus_hugepages_attr.attr, + &demote_size_attr.attr, + &demote_attr.attr, NULL, }; -- 2.31.1