[PATCH v2 1/2] mm/page_alloc: conditionally split > pageblock_order pages in free_one_page() and move_freepages_block_isolate()

David Hildenbrand <david@xxxxxxxxxx> · Tue, 10 Dec 2024 11:29:52 +0100

Let's special-case for the common scenarios that:

(a) We are freeing pages <= pageblock_order
(b) We are freeing a page <= MAX_PAGE_ORDER and all pageblocks match
    (especially, no mixture of isolated and non-isolated pageblocks)

When we encounter a > MAX_PAGE_ORDER page, it can only come from
alloc_contig_range(), and we can process MAX_PAGE_ORDER chunks.

When we encounter a >pageblock_order <= MAX_PAGE_ORDER page,
check whether all pageblocks match, and if so (common case), don't
split them up just for the buddy to merge them back.

This makes sure that when we free MAX_PAGE_ORDER chunks to the buddy,
for example during system startups, memory onlining, or when isolating
consecutive pageblocks via alloc_contig_range()/memory offlining, that
we don't unnecessarily split up what we'll immediately merge again,
because the migratetypes match.

Rename split_large_buddy() to __free_one_page_maybe_split(), to make it
clearer what's happening, and handle in it only natural buddy orders,
not the alloc_contig_range(__GFP_COMP) special case: handle that in
free_one_page() only.

In the future, we might want to assume that all pageblocks are equal if
zone->nr_isolate_pageblock  == 0; however, that will require some
zone->nr_isolate_pageblock accounting changes, such that we are
guaranteed to see zone->nr_isolate_pageblock != 0 when there is an
isolated pageblock.

Reviewed-by: Zi Yan <ziy@xxxxxxxxxx>
Acked-by: Yu Zhao <yuzhao@xxxxxxxxxx>
Acked-by: Vlastimil Babka <vbabka@xxxxxxx
Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
 mm/page_alloc.c | 73 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 14 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a52c6022c65cb..444e4bcb9c7c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1225,27 +1225,53 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
-/* Split a multi-block free page into its individual pageblocks. */
-static void split_large_buddy(struct zone *zone, struct page *page,
-			      unsigned long pfn, int order, fpi_t fpi)
+static bool pfnblock_migratetype_equal(unsigned long pfn,
+		unsigned long end_pfn, int mt)
 {
-	unsigned long end = pfn + (1 << order);
+	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn | end_pfn, pageblock_nr_pages));
 
+	while (pfn != end_pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		if (unlikely(mt != get_pfnblock_migratetype(page, pfn)))
+			return false;
+		pfn += pageblock_nr_pages;
+	}
+	return true;
+}
+
+static void __free_one_page_maybe_split(struct zone *zone, struct page *page,
+		unsigned long pfn, int order, fpi_t fpi_flags)
+{
+	const unsigned long end_pfn = pfn + (1 << order);
+	int mt = get_pfnblock_migratetype(page, pfn);
+
+	VM_WARN_ON_ONCE(order > MAX_PAGE_ORDER);
 	VM_WARN_ON_ONCE(!IS_ALIGNED(pfn, 1 << order));
 	/* Caller removed page from freelist, buddy info cleared! */
 	VM_WARN_ON_ONCE(PageBuddy(page));
 
-	if (order > pageblock_order)
-		order = pageblock_order;
+	/*
+	 * With CONFIG_MEMORY_ISOLATION, we might be freeing MAX_ORDER_NR_PAGES
+	 * pages that cover pageblocks with different migratetypes; for example
+	 * only some migratetypes might be MIGRATE_ISOLATE. In that (unlikely)
+	 * case, fallback to freeing individual pageblocks so they get put
+	 * onto the right lists.
+	 */
+	if (!IS_ENABLED(CONFIG_MEMORY_ISOLATION) ||
+	    likely(order <= pageblock_order) ||
+	    pfnblock_migratetype_equal(pfn + pageblock_nr_pages, end_pfn, mt)) {
+		__free_one_page(page, pfn, zone, order, mt, fpi_flags);
+		return;
+	}
 
 	do {
-		int mt = get_pfnblock_migratetype(page, pfn);
-
-		__free_one_page(page, pfn, zone, order, mt, fpi);
-		pfn += 1 << order;
-		if (pfn == end)
+		__free_one_page(page, pfn, zone, pageblock_order, mt, fpi_flags);
+		pfn += pageblock_nr_pages;
+		if (pfn == end_pfn)
 			break;
 		page = pfn_to_page(pfn);
+		mt = get_pfnblock_migratetype(page, pfn);
 	} while (1);
 }
 
@@ -1256,7 +1282,26 @@ static void free_one_page(struct zone *zone, struct page *page,
 	unsigned long flags;
 
 	spin_lock_irqsave(&zone->lock, flags);
-	split_large_buddy(zone, page, pfn, order, fpi_flags);
+	if (likely(order <= MAX_PAGE_ORDER)) {
+		__free_one_page_maybe_split(zone, page, pfn, order, fpi_flags);
+	} else if (IS_ENABLED(CONFIG_CONTIG_ALLOC)) {
+		const unsigned long end_pfn = pfn + (1 << order);
+
+		/*
+		 * The only way we can end up with order > MAX_PAGE_ORDER is
+		 * through alloc_contig_range(__GFP_COMP).
+		 */
+		do {
+			__free_one_page_maybe_split(zone, page, pfn,
+						    MAX_PAGE_ORDER, fpi_flags);
+			pfn += MAX_ORDER_NR_PAGES;
+			if (pfn == end_pfn)
+				break;
+			page = pfn_to_page(pfn);
+		} while (1);
+	} else {
+		WARN_ON_ONCE(1);
+	}
 	spin_unlock_irqrestore(&zone->lock, flags);
 
 	__count_vm_events(PGFREE, 1 << order);
@@ -1792,7 +1837,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
 		del_page_from_free_list(buddy, zone, order,
 					get_pfnblock_migratetype(buddy, pfn));
 		set_pageblock_migratetype(page, migratetype);
-		split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
+		__free_one_page_maybe_split(zone, buddy, pfn, order, FPI_NONE);
 		return true;
 	}
 
@@ -1803,7 +1848,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
 		del_page_from_free_list(page, zone, order,
 					get_pfnblock_migratetype(page, pfn));
 		set_pageblock_migratetype(page, migratetype);
-		split_large_buddy(zone, page, pfn, order, FPI_NONE);
+		__free_one_page_maybe_split(zone, page, pfn, order, FPI_NONE);
 		return true;
 	}
 move:
-- 
2.47.1