Re: [PATCH V2 0/6] mm: page_alloc: freelist migratetype hygiene

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> The attached patch has all the suggested changes, let me know how it
> looks to you. Thanks.

The one I sent has free page accounting issues. The attached one fixes them.

--
Best Regards,
Yan, Zi
From b428b4919e30dc0556406325d3c173a87f45f135 Mon Sep 17 00:00:00 2001
From: Zi Yan <ziy@xxxxxxxxxx>
Date: Mon, 25 Sep 2023 16:55:18 -0400
Subject: [PATCH v2] mm/page_isolation: split cross-pageblock free pages during
 isolation

alloc_contig_range() uses set_migrateype_isolate(), which eventually calls
move_freepages(), to isolate free pages. But move_freepages() was not able
to move free pages partially covered by the specified range, leaving a race
window open[1]. Fix it by splitting such pages before calling
move_freepages().

A common code to find the start pfn of a free page straddling a given pfn
is refactored in find_straddling_buddy(). split_free_page() is modified
to change pageblock migratetype inside the function.

[1] https://lore.kernel.org/linux-mm/20230920160400.GC124289@xxxxxxxxxxx/

Suggested-by: Johannes Weiner <hannes@xxxxxxxxxxx>
Signed-off-by: Zi Yan <ziy@xxxxxxxxxx>
---
 include/linux/page-isolation.h |  12 +++-
 mm/internal.h                  |   3 -
 mm/page_alloc.c                | 103 ++++++++++++++++++------------
 mm/page_isolation.c            | 113 ++++++++++++++++++++++-----------
 4 files changed, 151 insertions(+), 80 deletions(-)

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 901915747960..e82ab67867df 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -33,9 +33,17 @@ static inline bool is_migrate_isolate(int migratetype)
 #define MEMORY_OFFLINE	0x1
 #define REPORT_FAILURE	0x2
 
+unsigned long find_straddling_buddy(unsigned long start_pfn);
+int split_free_page(struct page *free_page,
+			unsigned int order, unsigned long split_pfn_offset,
+			int mt1, int mt2);
 void set_pageblock_migratetype(struct page *page, int migratetype);
-int move_freepages_block(struct zone *zone, struct page *page,
-			 int old_mt, int new_mt);
+int move_freepages(struct zone *zone, unsigned long start_pfn,
+			  unsigned long end_pfn, int old_mt, int new_mt);
+bool prep_move_freepages_block(struct zone *zone, struct page *page,
+				      unsigned long *start_pfn,
+				      unsigned long *end_pfn,
+				      int *num_free, int *num_movable);
 
 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 			     int migratetype, int flags, gfp_t gfp_flags);
diff --git a/mm/internal.h b/mm/internal.h
index 8c90e966e9f8..cda702359c0f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -457,9 +457,6 @@ void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
 		unsigned long, enum meminit_context, struct vmem_altmap *, int);
 
 
-int split_free_page(struct page *free_page,
-			unsigned int order, unsigned long split_pfn_offset);
-
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 928bb595d7cc..e877fbdb700e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -851,6 +851,8 @@ static inline void __free_one_page(struct page *page,
  * @free_page:		the original free page
  * @order:		the order of the page
  * @split_pfn_offset:	split offset within the page
+ * @mt1:		migratetype set before the offset
+ * @mt2:		migratetype set after the offset
  *
  * Return -ENOENT if the free page is changed, otherwise 0
  *
@@ -860,20 +862,21 @@ static inline void __free_one_page(struct page *page,
  * nothing.
  */
 int split_free_page(struct page *free_page,
-			unsigned int order, unsigned long split_pfn_offset)
+			unsigned int order, unsigned long split_pfn_offset,
+			int mt1, int mt2)
 {
 	struct zone *zone = page_zone(free_page);
 	unsigned long free_page_pfn = page_to_pfn(free_page);
 	unsigned long pfn;
-	unsigned long flags;
 	int free_page_order;
 	int mt;
 	int ret = 0;
 
-	if (split_pfn_offset == 0)
-		return ret;
+	/* zone lock should be held when this function is called */
+	lockdep_assert_held(&zone->lock);
 
-	spin_lock_irqsave(&zone->lock, flags);
+	if (split_pfn_offset == 0 || split_pfn_offset >= (1 << order))
+		return ret;
 
 	if (!PageBuddy(free_page) || buddy_order(free_page) != order) {
 		ret = -ENOENT;
@@ -883,6 +886,10 @@ int split_free_page(struct page *free_page,
 	mt = get_pfnblock_migratetype(free_page, free_page_pfn);
 	del_page_from_free_list(free_page, zone, order, mt);
 
+	set_pageblock_migratetype(free_page, mt1);
+	set_pageblock_migratetype(pfn_to_page(free_page_pfn + split_pfn_offset),
+				  mt2);
+
 	for (pfn = free_page_pfn;
 	     pfn < free_page_pfn + (1UL << order);) {
 		int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn);
@@ -899,7 +906,6 @@ int split_free_page(struct page *free_page,
 			split_pfn_offset = (1UL << order) - (pfn - free_page_pfn);
 	}
 out:
-	spin_unlock_irqrestore(&zone->lock, flags);
 	return ret;
 }
 /*
@@ -1588,21 +1594,52 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
 					unsigned int order) { return NULL; }
 #endif
 
+/*
+ * Scan the range before this pfn for a buddy that straddles it
+ */
+unsigned long find_straddling_buddy(unsigned long start_pfn)
+{
+	int order = 0;
+	struct page *page;
+	unsigned long pfn = start_pfn;
+
+	while (!PageBuddy(page = pfn_to_page(pfn))) {
+		/* Nothing found */
+		if (++order > MAX_ORDER)
+			return start_pfn;
+		pfn &= ~0UL << order;
+	}
+
+	/*
+	 * Found a preceding buddy, but does it straddle?
+	 */
+	if (pfn + (1 << buddy_order(page)) > start_pfn)
+		return pfn;
+
+	/* Nothing found */
+	return start_pfn;
+}
+
 /*
  * Move the free pages in a range to the freelist tail of the requested type.
  * Note that start_page and end_pages are not aligned on a pageblock
  * boundary. If alignment is required, use move_freepages_block()
  */
-static int move_freepages(struct zone *zone, unsigned long start_pfn,
+int move_freepages(struct zone *zone, unsigned long start_pfn,
 			  unsigned long end_pfn, int old_mt, int new_mt)
 {
-	struct page *page;
-	unsigned long pfn;
-	unsigned int order;
+	struct page *start_page = pfn_to_page(start_pfn);
 	int pages_moved = 0;
+	unsigned long pfn = start_pfn;
+
+	VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1));
+	VM_WARN_ON(start_pfn + pageblock_nr_pages - 1 != end_pfn);
+
+	/* Move buddies within the block */
+	while (pfn <= end_pfn) {
+		struct page *page = pfn_to_page(pfn);
+		int order, nr_pages;
 
-	for (pfn = start_pfn; pfn <= end_pfn;) {
-		page = pfn_to_page(pfn);
 		if (!PageBuddy(page)) {
 			pfn++;
 			continue;
@@ -1613,16 +1650,20 @@ static int move_freepages(struct zone *zone, unsigned long start_pfn,
 		VM_BUG_ON_PAGE(page_zone(page) != zone, page);
 
 		order = buddy_order(page);
+		nr_pages = 1 << order;
+
 		move_to_free_list(page, zone, order, old_mt, new_mt);
-		pfn += 1 << order;
-		pages_moved += 1 << order;
+
+		pfn += nr_pages;
+		pages_moved += nr_pages;
 	}
-	set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
+
+	set_pageblock_migratetype(start_page, new_mt);
 
 	return pages_moved;
 }
 
-static bool prep_move_freepages_block(struct zone *zone, struct page *page,
+bool prep_move_freepages_block(struct zone *zone, struct page *page,
 				      unsigned long *start_pfn,
 				      unsigned long *end_pfn,
 				      int *num_free, int *num_movable)
@@ -6138,7 +6179,6 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		       unsigned migratetype, gfp_t gfp_mask)
 {
 	unsigned long outer_start, outer_end;
-	int order;
 	int ret = 0;
 
 	struct compact_control cc = {
@@ -6212,28 +6252,13 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 	 * isolated thus they won't get removed from buddy.
 	 */
 
-	order = 0;
-	outer_start = start;
-	while (!PageBuddy(pfn_to_page(outer_start))) {
-		if (++order > MAX_ORDER) {
-			outer_start = start;
-			break;
-		}
-		outer_start &= ~0UL << order;
-	}
-
-	if (outer_start != start) {
-		order = buddy_order(pfn_to_page(outer_start));
-
-		/*
-		 * outer_start page could be small order buddy page and
-		 * it doesn't include start page. Adjust outer_start
-		 * in this case to report failed page properly
-		 * on tracepoint in test_pages_isolated()
-		 */
-		if (outer_start + (1UL << order) <= start)
-			outer_start = start;
-	}
+	/*
+	 * outer_start page could be small order buddy page and it doesn't
+	 * include start page. outer_start is set to start in
+	 * find_straddling_buddy() to report failed page properly on tracepoint
+	 * in test_pages_isolated()
+	 */
+	outer_start = find_straddling_buddy(start);
 
 	/* Make sure the range is really isolated. */
 	if (test_pages_isolated(outer_start, end, 0)) {
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 5f8c658c0853..0500dff477f8 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -139,6 +139,62 @@ static struct page *has_unmovable_pages(unsigned long start_pfn, unsigned long e
 	return NULL;
 }
 
+/*
+ * additional steps for moving free pages during page isolation
+ */
+static int move_freepages_for_isolation(struct zone *zone, unsigned long start_pfn,
+			  unsigned long end_pfn, int old_mt, int new_mt)
+{
+	struct page *start_page = pfn_to_page(start_pfn);
+	unsigned long pfn;
+
+	VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1));
+	VM_WARN_ON(start_pfn + pageblock_nr_pages - 1 != end_pfn);
+
+	/*
+	 * A free page may be comprised of 2^n blocks, which means our
+	 * block of interest could be head or tail in such a page.
+	 *
+	 * If we're a tail, update the type of our block, then split
+	 * the page into pageblocks. The splitting will do the leg
+	 * work of sorting the blocks into the right freelists.
+	 *
+	 * If we're a head, split the page into pageblocks first. This
+	 * ensures the migratetypes still match up during the freelist
+	 * removal. Then do the regular scan for buddies in the block
+	 * of interest, which will handle the rest.
+	 *
+	 * In theory, we could try to preserve 2^1 and larger blocks
+	 * that lie outside our range. In practice, MAX_ORDER is
+	 * usually one or two pageblocks anyway, so don't bother.
+	 *
+	 * Note that this only applies to page isolation, which calls
+	 * this on random blocks in the pfn range! When we move stuff
+	 * from inside the page allocator, the pages are coming off
+	 * the freelist (can't be tail) and multi-block pages are
+	 * handled directly in the stealing code (can't be a head).
+	 */
+
+	/* We're a tail */
+	pfn = find_straddling_buddy(start_pfn);
+	if (pfn != start_pfn) {
+		struct page *free_page = pfn_to_page(pfn);
+
+		split_free_page(free_page, buddy_order(free_page),
+				pageblock_nr_pages, old_mt, new_mt);
+		return pageblock_nr_pages;
+	}
+
+	/* We're a head */
+	if (PageBuddy(start_page) && buddy_order(start_page) > pageblock_order) {
+		split_free_page(start_page, buddy_order(start_page),
+				pageblock_nr_pages, new_mt, old_mt);
+		return pageblock_nr_pages;
+	}
+
+	return 0;
+}
+
 /*
  * This function set pageblock migratetype to isolate if no unmovable page is
  * present in [start_pfn, end_pfn). The pageblock must intersect with
@@ -178,15 +234,17 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
 	unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
 			migratetype, isol_flags);
 	if (!unmovable) {
-		int nr_pages;
 		int mt = get_pageblock_migratetype(page);
+		unsigned long start_pfn, end_pfn;
 
-		nr_pages = move_freepages_block(zone, page, mt, MIGRATE_ISOLATE);
-		/* Block spans zone boundaries? */
-		if (nr_pages == -1) {
+		if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, NULL, NULL)) {
 			spin_unlock_irqrestore(&zone->lock, flags);
 			return -EBUSY;
 		}
+
+		if (!move_freepages_for_isolation(zone, start_pfn, end_pfn, mt, MIGRATE_ISOLATE))
+			move_freepages(zone, start_pfn, end_pfn, mt, MIGRATE_ISOLATE);
+
 		zone->nr_isolate_pageblock++;
 		spin_unlock_irqrestore(&zone->lock, flags);
 		return 0;
@@ -253,13 +311,16 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
 	 * allocation.
 	 */
 	if (!isolated_page) {
-		int nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE,
-						    migratetype);
+		unsigned long start_pfn, end_pfn;
+
 		/*
 		 * Isolating this block already succeeded, so this
 		 * should not fail on zone boundaries.
 		 */
-		WARN_ON_ONCE(nr_pages == -1);
+		if (!prep_move_freepages_block(zone, page, &start_pfn, &end_pfn, NULL, NULL))
+			WARN_ON_ONCE(1);
+		else if (!move_freepages_for_isolation(zone, start_pfn, end_pfn, MIGRATE_ISOLATE, migratetype))
+			move_freepages(zone, start_pfn, end_pfn, MIGRATE_ISOLATE, migratetype);
 	} else {
 		set_pageblock_migratetype(page, migratetype);
 		__putback_isolated_page(page, order, migratetype);
@@ -380,11 +441,8 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 		if (PageBuddy(page)) {
 			int order = buddy_order(page);
 
-			if (pfn + (1UL << order) > boundary_pfn) {
-				/* free page changed before split, check it again */
-				if (split_free_page(page, order, boundary_pfn - pfn))
-					continue;
-			}
+			VM_WARN_ONCE(pfn + (1UL << order) > boundary_pfn,
+				"a free page sits across isolation boundary");
 
 			pfn += 1UL << order;
 			continue;
@@ -408,8 +466,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 			 * can be migrated. Otherwise, fail the isolation.
 			 */
 			if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) {
-				int order;
-				unsigned long outer_pfn;
 				int page_mt = get_pageblock_migratetype(page);
 				bool isolate_page = !is_migrate_isolate_page(page);
 				struct compact_control cc = {
@@ -427,9 +483,11 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 				/*
 				 * XXX: mark the page as MIGRATE_ISOLATE so that
 				 * no one else can grab the freed page after migration.
-				 * Ideally, the page should be freed as two separate
-				 * pages to be added into separate migratetype free
-				 * lists.
+				 * The page should be freed into separate migratetype
+				 * free lists, unless the free page order is greater
+				 * than pageblock order. It is not the case now,
+				 * since gigantic hugetlb is freed as order-0
+				 * pages and LRU pages do not cross pageblocks.
 				 */
 				if (isolate_page) {
 					ret = set_migratetype_isolate(page, page_mt,
@@ -451,25 +509,8 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
 
 				if (ret)
 					goto failed;
-				/*
-				 * reset pfn to the head of the free page, so
-				 * that the free page handling code above can split
-				 * the free page to the right migratetype list.
-				 *
-				 * head_pfn is not used here as a hugetlb page order
-				 * can be bigger than MAX_ORDER, but after it is
-				 * freed, the free page order is not. Use pfn within
-				 * the range to find the head of the free page.
-				 */
-				order = 0;
-				outer_pfn = pfn;
-				while (!PageBuddy(pfn_to_page(outer_pfn))) {
-					/* stop if we cannot find the free page */
-					if (++order > MAX_ORDER)
-						goto failed;
-					outer_pfn &= ~0UL << order;
-				}
-				pfn = outer_pfn;
+
+				pfn = head_pfn + nr_pages;
 				continue;
 			} else
 #endif
-- 
2.42.0

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux