The patch titled Subject: mm/page_alloc.c: memory hotplug: free pages as higher order has been added to the -mm tree. Its filename is memory_hotplug-free-pages-as-higher-order.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/memory_hotplug-free-pages-as-higher-order.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/memory_hotplug-free-pages-as-higher-order.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Arun KS <arunks@xxxxxxxxxxxxxx> Subject: mm/page_alloc.c: memory hotplug: free pages as higher order When freeing pages are done with higher order, time spent on coalescing pages by buddy allocator can be reduced. With section size of 256MB, hot add latency of a single section shows improvement from 50-60 ms to less than 1 ms, hence improving the hot add latency by 60%. Modify external providers of online callback to align with the change. Link: http://lkml.kernel.org/r/1538727006-5727-1-git-send-email-arunks@xxxxxxxxxxxxxx Signed-off-by: Arun KS <arunks@xxxxxxxxxxxxxx> Reviewed-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: K. Y. Srinivasan <kys@xxxxxxxxxxxxx> Cc: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> Cc: Stephen Hemminger <sthemmin@xxxxxxxxxxxxx> Cc: Boris Ostrovsky <boris.ostrovsky@xxxxxxxxxx> Cc: Juergen Gross <jgross@xxxxxxxx> Cc: Dan Williams <dan.j.williams@xxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Cc: Oscar Salvador <osalvador@xxxxxxx> Cc: Mathieu Malaterre <malat@xxxxxxxxxx> Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Cc: Souptick Joarder <jrdr.linux@xxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx> Cc: Aaron Lu <aaron.lu@xxxxxxxxx> Cc: Srivatsa Vaddagiri <vatsa@xxxxxxxxxxxxxx> Cc: Vinayak Menon <vinmenon@xxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- drivers/hv/hv_balloon.c | 6 ++-- drivers/xen/balloon.c | 23 +++++++++++----- include/linux/memory_hotplug.h | 2 - mm/internal.h | 1 mm/memory_hotplug.c | 42 ++++++++++++++++++++++--------- mm/page_alloc.c | 8 ++--- 6 files changed, 55 insertions(+), 27 deletions(-) --- a/drivers/hv/hv_balloon.c~memory_hotplug-free-pages-as-higher-order +++ a/drivers/hv/hv_balloon.c @@ -771,7 +771,7 @@ static void hv_mem_hot_add(unsigned long } } -static void hv_online_page(struct page *pg) +static int hv_online_page(struct page *pg, unsigned int order) { struct hv_hotadd_state *has; unsigned long flags; @@ -783,10 +783,12 @@ static void hv_online_page(struct page * if ((pfn < has->start_pfn) || (pfn >= has->end_pfn)) continue; - hv_page_online_one(has, pg); + hv_bring_pgs_online(has, pfn, (1UL << order)); break; } spin_unlock_irqrestore(&dm_device.ha_lock, flags); + + return 0; } static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) --- a/drivers/xen/balloon.c~memory_hotplug-free-pages-as-higher-order +++ a/drivers/xen/balloon.c @@ -390,8 +390,8 @@ static enum bp_state reserve_additional_ /* * add_memory_resource() will call online_pages() which in its turn - * will call xen_online_page() callback causing deadlock if we don't - * release balloon_mutex here. Unlocking here is safe because the + * will call xen_bring_pgs_online() callback causing deadlock if we + * don't release balloon_mutex here. Unlocking here is safe because the * callers drop the mutex before trying again. */ mutex_unlock(&balloon_mutex); @@ -411,15 +411,22 @@ static enum bp_state reserve_additional_ return BP_ECANCELED; } -static void xen_online_page(struct page *page) +static int xen_bring_pgs_online(struct page *pg, unsigned int order) { - __online_page_set_limits(page); + unsigned long i, size = (1 << order); + unsigned long start_pfn = page_to_pfn(pg); + struct page *p; + pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn); mutex_lock(&balloon_mutex); - - __balloon_append(page); - + for (i = 0; i < size; i++) { + p = pfn_to_page(start_pfn + i); + __online_page_set_limits(p); + __balloon_append(p); + } mutex_unlock(&balloon_mutex); + + return 0; } static int xen_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) @@ -744,7 +751,7 @@ static int __init balloon_init(void) balloon_stats.max_retry_count = RETRY_UNLIMITED; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - set_online_page_callback(&xen_online_page); + set_online_page_callback(&xen_bring_pgs_online); register_memory_notifier(&xen_memory_nb); register_sysctl_table(xen_root); --- a/include/linux/memory_hotplug.h~memory_hotplug-free-pages-as-higher-order +++ a/include/linux/memory_hotplug.h @@ -87,7 +87,7 @@ extern int test_pages_in_a_zone(unsigned unsigned long *valid_start, unsigned long *valid_end); extern void __offline_isolated_pages(unsigned long, unsigned long); -typedef void (*online_page_callback_t)(struct page *page); +typedef int (*online_page_callback_t)(struct page *page, unsigned int order); extern int set_online_page_callback(online_page_callback_t callback); extern int restore_online_page_callback(online_page_callback_t callback); --- a/mm/internal.h~memory_hotplug-free-pages-as-higher-order +++ a/mm/internal.h @@ -163,6 +163,7 @@ static inline struct page *pageblock_pfn extern int __isolate_free_page(struct page *page, unsigned int order); extern void __free_pages_bootmem(struct page *page, unsigned long pfn, unsigned int order); +extern void __free_pages_core(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned int order); extern void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags); --- a/mm/memory_hotplug.c~memory_hotplug-free-pages-as-higher-order +++ a/mm/memory_hotplug.c @@ -47,7 +47,7 @@ * and restore_online_page_callback() for generic callback restore. */ -static void generic_online_page(struct page *page); +static int generic_online_page(struct page *page, unsigned int order); static online_page_callback_t online_page_callback = generic_online_page; static DEFINE_MUTEX(online_page_callback_lock); @@ -655,26 +655,44 @@ void __online_page_free(struct page *pag } EXPORT_SYMBOL_GPL(__online_page_free); -static void generic_online_page(struct page *page) +static int generic_online_page(struct page *page, unsigned int order) { - __online_page_set_limits(page); - __online_page_increment_counters(page); - __online_page_free(page); + __free_pages_core(page, order); + totalram_pages += (1UL << order); +#ifdef CONFIG_HIGHMEM + if (PageHighMem(page)) + totalhigh_pages += (1UL << order); +#endif + return 0; +} + +static int online_pages_blocks(unsigned long start, unsigned long nr_pages) +{ + unsigned long end = start + nr_pages; + int order, ret, onlined_pages = 0; + + while (start < end) { + order = min(MAX_ORDER - 1, + get_order(PFN_PHYS(end) - PFN_PHYS(start))); + + ret = (*online_page_callback)(pfn_to_page(start), order); + if (!ret) + onlined_pages += (1UL << order); + else if (ret > 0) + onlined_pages += ret; + + start += (1UL << order); + } + return onlined_pages; } static int online_pages_range(unsigned long start_pfn, unsigned long nr_pages, void *arg) { - unsigned long i; unsigned long onlined_pages = *(unsigned long *)arg; - struct page *page; if (PageReserved(pfn_to_page(start_pfn))) - for (i = 0; i < nr_pages; i++) { - page = pfn_to_page(start_pfn + i); - (*online_page_callback)(page); - onlined_pages++; - } + onlined_pages = online_pages_blocks(start_pfn, nr_pages); online_mem_sections(start_pfn, start_pfn + nr_pages); --- a/mm/page_alloc.c~memory_hotplug-free-pages-as-higher-order +++ a/mm/page_alloc.c @@ -1258,7 +1258,7 @@ static void __free_pages_ok(struct page local_irq_restore(flags); } -static void __init __free_pages_boot_core(struct page *page, unsigned int order) +void __free_pages_core(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; struct page *p = page; @@ -1337,7 +1337,7 @@ void __init __free_pages_bootmem(struct { if (early_page_uninitialised(pfn)) return; - return __free_pages_boot_core(page, order); + return __free_pages_core(page, order); } /* @@ -1427,14 +1427,14 @@ static void __init deferred_free_range(u if (nr_pages == pageblock_nr_pages && (pfn & (pageblock_nr_pages - 1)) == 0) { set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, pageblock_order); + __free_pages_core(page, pageblock_order); return; } for (i = 0; i < nr_pages; i++, page++, pfn++) { if ((pfn & (pageblock_nr_pages - 1)) == 0) set_pageblock_migratetype(page, MIGRATE_MOVABLE); - __free_pages_boot_core(page, 0); + __free_pages_core(page, 0); } } _ Patches currently in -mm which might be from arunks@xxxxxxxxxxxxxx are memory_hotplug-free-pages-as-higher-order.patch mm-page_alloc-remove-software-prefetching-in-__free_pages_core.patch