From: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> Because the implementation was limiting itself to only providing hints on pages huge TLB order sized or larger we introduced the possibility for free pages to slip past us because they are freed as something less then huge TLB in size and aggregated with buddies later. To address that I am adding a new call arch_merge_page which is called after __free_one_page has merged a pair of pages to create a higher order page. By doing this I am able to fill the gap and provide full coverage for all of the pages huge TLB order or larger. Signed-off-by: Alexander Duyck <alexander.h.duyck@xxxxxxxxxxxxxxx> --- arch/x86/include/asm/page.h | 12 ++++++++++++ arch/x86/kernel/kvm.c | 28 ++++++++++++++++++++++++++++ include/linux/gfp.h | 4 ++++ mm/page_alloc.c | 2 ++ 4 files changed, 46 insertions(+) diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 4487ad7a3385..9540a97c9997 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -29,6 +29,18 @@ static inline void arch_free_page(struct page *page, unsigned int order) if (static_branch_unlikely(&pv_free_page_hint_enabled)) __arch_free_page(page, order); } + +struct zone; + +#define HAVE_ARCH_MERGE_PAGE +void __arch_merge_page(struct zone *zone, struct page *page, + unsigned int order); +static inline void arch_merge_page(struct zone *zone, struct page *page, + unsigned int order) +{ + if (static_branch_unlikely(&pv_free_page_hint_enabled)) + __arch_merge_page(zone, page, order); +} #endif #include <linux/range.h> diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 09c91641c36c..957bb4f427bb 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -785,6 +785,34 @@ void __arch_free_page(struct page *page, unsigned int order) PAGE_SIZE << order); } +void __arch_merge_page(struct zone *zone, struct page *page, + unsigned int order) +{ + /* + * The merging logic has merged a set of buddies up to the + * KVM_PV_UNUSED_PAGE_HINT_MIN_ORDER. Since that is the case, take + * advantage of this moment to notify the hypervisor of the free + * memory. + */ + if (order != KVM_PV_UNUSED_PAGE_HINT_MIN_ORDER) + return; + + /* + * Drop zone lock while processing the hypercall. This + * should be safe as the page has not yet been added + * to the buddy list as of yet and all the pages that + * were merged have had their buddy/guard flags cleared + * and their order reset to 0. + */ + spin_unlock(&zone->lock); + + kvm_hypercall2(KVM_HC_UNUSED_PAGE_HINT, page_to_phys(page), + PAGE_SIZE << order); + + /* reacquire lock and resume freeing memory */ + spin_lock(&zone->lock); +} + #ifdef CONFIG_PARAVIRT_SPINLOCKS /* Kick a cpu by its apicid. Used to wake up a halted vcpu */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index fdab7de7490d..4746d5560193 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -459,6 +459,10 @@ static inline struct zonelist *node_zonelist(int nid, gfp_t flags) #ifndef HAVE_ARCH_FREE_PAGE static inline void arch_free_page(struct page *page, int order) { } #endif +#ifndef HAVE_ARCH_MERGE_PAGE +static inline void +arch_merge_page(struct zone *zone, struct page *page, int order) { } +#endif #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c954f8c1fbc4..7a1309b0b7c5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -913,6 +913,8 @@ static inline void __free_one_page(struct page *page, page = page + (combined_pfn - pfn); pfn = combined_pfn; order++; + + arch_merge_page(zone, page, order); } if (max_order < MAX_ORDER) { /* If we are here, it means order is >= pageblock_order.