On 25 Apr 2022, at 10:31, Zi Yan wrote: > From: Zi Yan <ziy@xxxxxxxxxx> > > alloc_contig_range() worked at MAX_ORDER_NR_PAGES granularity to avoid > merging pageblocks with different migratetypes. It might unnecessarily > convert extra pageblocks at the beginning and at the end of the range. > Change alloc_contig_range() to work at pageblock granularity. > > Special handling is needed for free pages and in-use pages across the > boundaries of the range specified by alloc_contig_range(). Because these > partially isolated pages causes free page accounting issues. The free > pages will be split and freed into separate migratetype lists; the > in-use pages will be migrated then the freed pages will be handled in > the aforementioned way. > > Reported-by: kernel test robot <lkp@xxxxxxxxx> > Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> > --- > include/linux/page-isolation.h | 4 +- > mm/internal.h | 6 ++ > mm/memory_hotplug.c | 3 +- > mm/page_alloc.c | 54 ++++++++-- > mm/page_isolation.c | 184 ++++++++++++++++++++++++++++++++- > 5 files changed, 233 insertions(+), 18 deletions(-) > > diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h > index e14eddf6741a..5456b7be38ae 100644 > --- a/include/linux/page-isolation.h > +++ b/include/linux/page-isolation.h > @@ -42,7 +42,7 @@ int move_freepages_block(struct zone *zone, struct page *page, > */ > int > start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > - unsigned migratetype, int flags); > + int migratetype, int flags, gfp_t gfp_flags); > > /* > * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. > @@ -50,7 +50,7 @@ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > */ > void > undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > - unsigned migratetype); > + int migratetype); > > /* > * Test all pages in [start_pfn, end_pfn) are isolated or not. > diff --git a/mm/internal.h b/mm/internal.h > index 919fa07e1031..0667abd57634 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -359,6 +359,9 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, > phys_addr_t min_addr, > int nid, bool exact_nid); > > +void split_free_page(struct page *free_page, > + int order, unsigned long split_pfn_offset); > + > #if defined CONFIG_COMPACTION || defined CONFIG_CMA > > /* > @@ -422,6 +425,9 @@ isolate_freepages_range(struct compact_control *cc, > int > isolate_migratepages_range(struct compact_control *cc, > unsigned long low_pfn, unsigned long end_pfn); > + > +int __alloc_contig_migrate_range(struct compact_control *cc, > + unsigned long start, unsigned long end); > #endif > int find_suitable_fallback(struct free_area *area, unsigned int order, > int migratetype, bool only_stealable, bool *can_steal); > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c > index 4c6065e5d274..9f8ae4cb77ee 100644 > --- a/mm/memory_hotplug.c > +++ b/mm/memory_hotplug.c > @@ -1845,7 +1845,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, > /* set above range as isolated */ > ret = start_isolate_page_range(start_pfn, end_pfn, > MIGRATE_MOVABLE, > - MEMORY_OFFLINE | REPORT_FAILURE); > + MEMORY_OFFLINE | REPORT_FAILURE, > + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL); > if (ret) { > reason = "failure to isolate range"; > goto failed_removal_pcplists_disabled; > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index ce23ac8ad085..70ddd9a0bcf3 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1094,6 +1094,43 @@ static inline void __free_one_page(struct page *page, > page_reporting_notify_free(order); > } > > +/** > + * split_free_page() -- split a free page at split_pfn_offset > + * @free_page: the original free page > + * @order: the order of the page > + * @split_pfn_offset: split offset within the page > + * > + * It is used when the free page crosses two pageblocks with different migratetypes > + * at split_pfn_offset within the page. The split free page will be put into > + * separate migratetype lists afterwards. Otherwise, the function achieves > + * nothing. > + */ > +void split_free_page(struct page *free_page, > + int order, unsigned long split_pfn_offset) > +{ > + struct zone *zone = page_zone(free_page); > + unsigned long free_page_pfn = page_to_pfn(free_page); > + unsigned long pfn; > + unsigned long flags; > + int free_page_order; > + > + spin_lock_irqsave(&zone->lock, flags); > + del_page_from_free_list(free_page, zone, order); > + for (pfn = free_page_pfn; > + pfn < free_page_pfn + (1UL << order);) { > + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn); > + > + free_page_order = ffs(split_pfn_offset) - 1; > + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order, > + mt, FPI_NONE); > + pfn += 1UL << free_page_order; > + split_pfn_offset -= (1UL << free_page_order); > + /* we have done the first part, now switch to second part */ > + if (split_pfn_offset == 0) > + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn); > + } > + spin_unlock_irqrestore(&zone->lock, flags); > +} > /* > * A bad page could be due to a number of fields. Instead of multiple branches, > * try and check multiple fields with one check. The caller must do a detailed > @@ -8919,7 +8956,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list) > #endif > > /* [start, end) must belong to a single zone. */ > -static int __alloc_contig_migrate_range(struct compact_control *cc, > +int __alloc_contig_migrate_range(struct compact_control *cc, > unsigned long start, unsigned long end) > { > /* This function is based on compact_zone() from compaction.c. */ > @@ -9002,7 +9039,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, > unsigned migratetype, gfp_t gfp_mask) > { > unsigned long outer_start, outer_end; > - unsigned int order; > + int order; > int ret = 0; > > struct compact_control cc = { > @@ -9021,14 +9058,11 @@ int alloc_contig_range(unsigned long start, unsigned long end, > * What we do here is we mark all pageblocks in range as > * MIGRATE_ISOLATE. Because pageblock and max order pages may > * have different sizes, and due to the way page allocator > - * work, we align the range to biggest of the two pages so > - * that page allocator won't try to merge buddies from > - * different pageblocks and change MIGRATE_ISOLATE to some > - * other migration type. > + * work, start_isolate_page_range() has special handlings for this. > * > * Once the pageblocks are marked as MIGRATE_ISOLATE, we > * migrate the pages from an unaligned range (ie. pages that > - * we are interested in). This will put all the pages in > + * we are interested in). This will put all the pages in > * range back to page allocator as MIGRATE_ISOLATE. > * > * When this is done, we take the pages in range from page > @@ -9042,9 +9076,9 @@ int alloc_contig_range(unsigned long start, unsigned long end, > */ > > ret = start_isolate_page_range(pfn_max_align_down(start), > - pfn_max_align_up(end), migratetype, 0); > + pfn_max_align_up(end), migratetype, 0, gfp_mask); > if (ret) > - return ret; > + goto done; > > drain_all_pages(cc.zone); > > @@ -9064,7 +9098,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, > ret = 0; > > /* > - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES > + * Pages from [start, end) are within a pageblock_nr_pages > * aligned blocks that are marked as MIGRATE_ISOLATE. What's > * more, all pages in [start, end) are free in page allocator. > * What we are going to do is to allocate all pages from > diff --git a/mm/page_isolation.c b/mm/page_isolation.c > index c2f7a8bb634d..94b3467e5ba2 100644 > --- a/mm/page_isolation.c > +++ b/mm/page_isolation.c > @@ -203,7 +203,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ > return -EBUSY; > } > > -static void unset_migratetype_isolate(struct page *page, unsigned migratetype) > +static void unset_migratetype_isolate(struct page *page, int migratetype) > { > struct zone *zone; > unsigned long flags, nr_pages; > @@ -279,6 +279,157 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > return NULL; > } > > +/** > + * isolate_single_pageblock() -- tries to isolate a pageblock that might be > + * within a free or in-use page. > + * @boundary_pfn: pageblock-aligned pfn that a page might cross > + * @gfp_flags: GFP flags used for migrating pages > + * @isolate_before: isolate the pageblock before the boundary_pfn > + * > + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one > + * pageblock. When not all pageblocks within a page are isolated at the same > + * time, free page accounting can go wrong. For example, in the case of > + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks. > + * [ MAX_ORDER-1 ] > + * [ pageblock0 | pageblock1 ] > + * When either pageblock is isolated, if it is a free page, the page is not > + * split into separate migratetype lists, which is supposed to; if it is an > + * in-use page and freed later, __free_one_page() does not split the free page > + * either. The function handles this by splitting the free page or migrating > + * the in-use page then splitting the free page. > + */ > +static int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, > + bool isolate_before) > +{ > + unsigned char saved_mt; > + unsigned long start_pfn; > + unsigned long isolate_pageblock; > + unsigned long pfn; > + struct zone *zone; > + > + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages)); > + > + if (isolate_before) > + isolate_pageblock = boundary_pfn - pageblock_nr_pages; > + else > + isolate_pageblock = boundary_pfn; > + > + /* > + * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid > + * only isolating a subset of pageblocks from a bigger than pageblock > + * free or in-use page. Also make sure all to-be-isolated pageblocks > + * are within the same zone. > + */ > + zone = page_zone(pfn_to_page(isolate_pageblock)); > + start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), > + zone->zone_start_pfn); > + > + saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); > + set_pageblock_migratetype(pfn_to_page(isolate_pageblock), MIGRATE_ISOLATE); > + > + /* > + * Bail out early when the to-be-isolated pageblock does not form > + * a free or in-use page across boundary_pfn: > + * > + * 1. isolate before boundary_pfn: the page after is not online > + * 2. isolate after boundary_pfn: the page before is not online > + * > + * This also ensures correctness. Without it, when isolate after > + * boundary_pfn and [start_pfn, boundary_pfn) are not online, > + * __first_valid_page() will return unexpected NULL in the for loop > + * below. > + */ > + if (isolate_before) { > + if (!pfn_to_online_page(boundary_pfn)) > + return 0; > + } else { > + if (!pfn_to_online_page(boundary_pfn - 1)) > + return 0; > + } > + > + for (pfn = start_pfn; pfn < boundary_pfn;) { > + struct page *page = __first_valid_page(pfn, boundary_pfn - pfn); > + > + VM_BUG_ON(!page); > + pfn = page_to_pfn(page); > + /* > + * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any > + * free pages in [start_pfn, boundary_pfn), its head page will > + * always be in the range. > + */ > + if (PageBuddy(page)) { > + int order = buddy_order(page); > + > + if (pfn + (1UL << order) > boundary_pfn) > + split_free_page(page, order, boundary_pfn - pfn); > + pfn += (1UL << order); > + continue; > + } > + /* > + * migrate compound pages then let the free page handling code > + * above do the rest. If migration is not enabled, just fail. > + */ > + if (PageHuge(page) || PageTransCompound(page)) { > +#if defined CONFIG_COMPACTION || defined CONFIG_CMA > + unsigned long nr_pages = compound_nr(page); > + int order = compound_order(page); > + struct page *head = compound_head(page); > + unsigned long head_pfn = page_to_pfn(head); > + int ret; > + struct compact_control cc = { > + .nr_migratepages = 0, > + .order = -1, > + .zone = page_zone(pfn_to_page(head_pfn)), > + .mode = MIGRATE_SYNC, > + .ignore_skip_hint = true, > + .no_set_skip_hint = true, > + .gfp_mask = gfp_flags, > + .alloc_contig = true, > + }; > + INIT_LIST_HEAD(&cc.migratepages); > + > + if (head_pfn + nr_pages < boundary_pfn) { > + pfn += nr_pages; > + continue; > + } > + > + ret = __alloc_contig_migrate_range(&cc, head_pfn, > + head_pfn + nr_pages); > + > + if (ret) > + goto failed; > + /* > + * reset pfn, let the free page handling code above > + * split the free page to the right migratetype list. > + * > + * head_pfn is not used here as a hugetlb page order > + * can be bigger than MAX_ORDER-1, but after it is > + * freed, the free page order is not. Use pfn within > + * the range to find the head of the free page and > + * reset order to 0 if a hugetlb page with > + * >MAX_ORDER-1 order is encountered. > + */ > + if (order > MAX_ORDER-1) > + order = 0; > + while (!PageBuddy(pfn_to_page(pfn))) { > + order++; > + pfn &= ~0UL << order; > + } > + continue; > +#else > + goto failed; > +#endif > + } > + > + pfn++; > + } > + return 0; > +failed: > + /* restore the original migratetype */ > + set_pageblock_migratetype(pfn_to_page(isolate_pageblock), saved_mt); > + return -EBUSY; > +} > + > /** > * start_isolate_page_range() - make page-allocation-type of range of pages to > * be MIGRATE_ISOLATE. > @@ -293,6 +444,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > * and PageOffline() pages. > * REPORT_FAILURE - report details about the failure to > * isolate the range > + * @gfp_flags: GFP flags used for migrating pages that sit across the > + * range boundaries. > * > * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in > * the range will never be allocated. Any free pages and pages freed in the > @@ -301,6 +454,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > * pages in the range finally, the caller have to free all pages in the range. > * test_page_isolated() can be used for test it. > * > + * The function first tries to isolate the pageblocks at the beginning and end > + * of the range, since there might be pages across the range boundaries. > + * Afterwards, it isolates the rest of the range. > + * > * There is no high level synchronization mechanism that prevents two threads > * from trying to isolate overlapping ranges. If this happens, one thread > * will notice pageblocks in the overlapping range already set to isolate. > @@ -321,21 +478,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > * Return: 0 on success and -EBUSY if any part of range cannot be isolated. > */ > int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > - unsigned migratetype, int flags) > + int migratetype, int flags, gfp_t gfp_flags) > { > unsigned long pfn; > struct page *page; > + int ret; > > BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); > BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); > > - for (pfn = start_pfn; > - pfn < end_pfn; > + /* isolate [start_pfn, start_pfn + pageblock_nr_pages) pageblock */ > + ret = isolate_single_pageblock(start_pfn, gfp_flags, false); > + if (ret) > + return ret; > + > + /* isolate [end_pfn - pageblock_nr_pages, end_pfn) pageblock */ > + ret = isolate_single_pageblock(end_pfn, gfp_flags, true); > + if (ret) { > + unset_migratetype_isolate(pfn_to_page(start_pfn), migratetype); > + return ret; > + } > + > + /* skip isolated pageblocks at the beginning and end */ > + for (pfn = start_pfn + pageblock_nr_pages; > + pfn < end_pfn - pageblock_nr_pages; > pfn += pageblock_nr_pages) { > page = __first_valid_page(pfn, pageblock_nr_pages); > if (page && set_migratetype_isolate(page, migratetype, flags, > start_pfn, end_pfn)) { > undo_isolate_page_range(start_pfn, pfn, migratetype); > + unset_migratetype_isolate( > + pfn_to_page(end_pfn - pageblock_nr_pages), > + migratetype); > return -EBUSY; > } > } > @@ -346,7 +520,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > * Make isolated pages available again. > */ > void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > - unsigned migratetype) > + int migratetype) > { > unsigned long pfn; > struct page *page; > -- > 2.35.1 Qian hit a bug caused by this series https://lore.kernel.org/linux-mm/20220426201855.GA1014@qian/ and the fix is: diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 75e454f5cf45..b3f074d1682e 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -367,58 +367,67 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, } /* * migrate compound pages then let the free page handling code - * above do the rest. If migration is not enabled, just fail. + * above do the rest. If migration is not possible, just fail. */ - if (PageHuge(page) || PageTransCompound(page)) { -#if defined CONFIG_COMPACTION || defined CONFIG_CMA + if (PageCompound(page)) { unsigned long nr_pages = compound_nr(page); - int order = compound_order(page); struct page *head = compound_head(page); unsigned long head_pfn = page_to_pfn(head); - int ret; - struct compact_control cc = { - .nr_migratepages = 0, - .order = -1, - .zone = page_zone(pfn_to_page(head_pfn)), - .mode = MIGRATE_SYNC, - .ignore_skip_hint = true, - .no_set_skip_hint = true, - .gfp_mask = gfp_flags, - .alloc_contig = true, - }; - INIT_LIST_HEAD(&cc.migratepages); if (head_pfn + nr_pages < boundary_pfn) { - pfn += nr_pages; + pfn = head_pfn + nr_pages; continue; } - - ret = __alloc_contig_migrate_range(&cc, head_pfn, - head_pfn + nr_pages); - - if (ret) - goto failed; +#if defined CONFIG_COMPACTION || defined CONFIG_CMA /* - * reset pfn, let the free page handling code above - * split the free page to the right migratetype list. - * - * head_pfn is not used here as a hugetlb page order - * can be bigger than MAX_ORDER-1, but after it is - * freed, the free page order is not. Use pfn within - * the range to find the head of the free page and - * reset order to 0 if a hugetlb page with - * >MAX_ORDER-1 order is encountered. + * hugetlb, lru compound (THP), and movable compound pages + * can be migrated. Otherwise, fail the isolation. */ - if (order > MAX_ORDER-1) + if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) { + int order; + unsigned long outer_pfn; + int ret; + struct compact_control cc = { + .nr_migratepages = 0, + .order = -1, + .zone = page_zone(pfn_to_page(head_pfn)), + .mode = MIGRATE_SYNC, + .ignore_skip_hint = true, + .no_set_skip_hint = true, + .gfp_mask = gfp_flags, + .alloc_contig = true, + }; + INIT_LIST_HEAD(&cc.migratepages); + + ret = __alloc_contig_migrate_range(&cc, head_pfn, + head_pfn + nr_pages); + + if (ret) + goto failed; + /* + * reset pfn to the head of the free page, so + * that the free page handling code above can split + * the free page to the right migratetype list. + * + * head_pfn is not used here as a hugetlb page order + * can be bigger than MAX_ORDER-1, but after it is + * freed, the free page order is not. Use pfn within + * the range to find the head of the free page. + */ order = 0; - while (!PageBuddy(pfn_to_page(pfn))) { - order++; - pfn &= ~0UL << order; - } - continue; -#else - goto failed; + outer_pfn = pfn; + while (!PageBuddy(pfn_to_page(outer_pfn))) { + if (++order >= MAX_ORDER) { + outer_pfn = pfn; + break; + } + outer_pfn &= ~0UL << order; + } + pfn = outer_pfn; + continue; + } else #endif + goto failed; } pfn++; -- 2.35.1 The fixed-up patch is below for easy review purpose: From fce466e89e50bcb0ebb56d7809db1b8bbea47628 Mon Sep 17 00:00:00 2001 From: Zi Yan <ziy@xxxxxxxxxx> Date: Tue, 26 Apr 2022 23:00:33 -0400 Subject: [PATCH] mm: make alloc_contig_range work at pageblock granularity alloc_contig_range() worked at MAX_ORDER_NR_PAGES granularity to avoid merging pageblocks with different migratetypes. It might unnecessarily convert extra pageblocks at the beginning and at the end of the range. Change alloc_contig_range() to work at pageblock granularity. Special handling is needed for free pages and in-use pages across the boundaries of the range specified by alloc_contig_range(). Because these partially isolated pages causes free page accounting issues. The free pages will be split and freed into separate migratetype lists; the in-use pages will be migrated then the freed pages will be handled in the aforementioned way. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/page-isolation.h | 4 +- mm/internal.h | 6 + mm/memory_hotplug.c | 3 +- mm/page_alloc.c | 54 +++++++-- mm/page_isolation.c | 193 ++++++++++++++++++++++++++++++++- 5 files changed, 242 insertions(+), 18 deletions(-) diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index e14eddf6741a..5456b7be38ae 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -42,7 +42,7 @@ int move_freepages_block(struct zone *zone, struct page *page, */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype, int flags); + int migratetype, int flags, gfp_t gfp_flags); /* * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. @@ -50,7 +50,7 @@ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, */ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype); + int migratetype); /* * Test all pages in [start_pfn, end_pfn) are isolated or not. diff --git a/mm/internal.h b/mm/internal.h index 919fa07e1031..0667abd57634 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -359,6 +359,9 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, int nid, bool exact_nid); +void split_free_page(struct page *free_page, + int order, unsigned long split_pfn_offset); + #if defined CONFIG_COMPACTION || defined CONFIG_CMA /* @@ -422,6 +425,9 @@ isolate_freepages_range(struct compact_control *cc, int isolate_migratepages_range(struct compact_control *cc, unsigned long low_pfn, unsigned long end_pfn); + +int __alloc_contig_migrate_range(struct compact_control *cc, + unsigned long start, unsigned long end); #endif int find_suitable_fallback(struct free_area *area, unsigned int order, int migratetype, bool only_stealable, bool *can_steal); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4c6065e5d274..9f8ae4cb77ee 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1845,7 +1845,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages, /* set above range as isolated */ ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE, - MEMORY_OFFLINE | REPORT_FAILURE); + MEMORY_OFFLINE | REPORT_FAILURE, + GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL); if (ret) { reason = "failure to isolate range"; goto failed_removal_pcplists_disabled; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 93dbe05a6029..6a0d1746c095 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1094,6 +1094,43 @@ static inline void __free_one_page(struct page *page, page_reporting_notify_free(order); } +/** + * split_free_page() -- split a free page at split_pfn_offset + * @free_page: the original free page + * @order: the order of the page + * @split_pfn_offset: split offset within the page + * + * It is used when the free page crosses two pageblocks with different migratetypes + * at split_pfn_offset within the page. The split free page will be put into + * separate migratetype lists afterwards. Otherwise, the function achieves + * nothing. + */ +void split_free_page(struct page *free_page, + int order, unsigned long split_pfn_offset) +{ + struct zone *zone = page_zone(free_page); + unsigned long free_page_pfn = page_to_pfn(free_page); + unsigned long pfn; + unsigned long flags; + int free_page_order; + + spin_lock_irqsave(&zone->lock, flags); + del_page_from_free_list(free_page, zone, order); + for (pfn = free_page_pfn; + pfn < free_page_pfn + (1UL << order);) { + int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn); + + free_page_order = ffs(split_pfn_offset) - 1; + __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order, + mt, FPI_NONE); + pfn += 1UL << free_page_order; + split_pfn_offset -= (1UL << free_page_order); + /* we have done the first part, now switch to second part */ + if (split_pfn_offset == 0) + split_pfn_offset = (1UL << order) - (pfn - free_page_pfn); + } + spin_unlock_irqrestore(&zone->lock, flags); +} /* * A bad page could be due to a number of fields. Instead of multiple branches, * try and check multiple fields with one check. The caller must do a detailed @@ -8919,7 +8956,7 @@ static inline void alloc_contig_dump_pages(struct list_head *page_list) #endif /* [start, end) must belong to a single zone. */ -static int __alloc_contig_migrate_range(struct compact_control *cc, +int __alloc_contig_migrate_range(struct compact_control *cc, unsigned long start, unsigned long end) { /* This function is based on compact_zone() from compaction.c. */ @@ -9002,7 +9039,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask) { unsigned long outer_start, outer_end; - unsigned int order; + int order; int ret = 0; struct compact_control cc = { @@ -9021,14 +9058,11 @@ int alloc_contig_range(unsigned long start, unsigned long end, * What we do here is we mark all pageblocks in range as * MIGRATE_ISOLATE. Because pageblock and max order pages may * have different sizes, and due to the way page allocator - * work, we align the range to biggest of the two pages so - * that page allocator won't try to merge buddies from - * different pageblocks and change MIGRATE_ISOLATE to some - * other migration type. + * work, start_isolate_page_range() has special handlings for this. * * Once the pageblocks are marked as MIGRATE_ISOLATE, we * migrate the pages from an unaligned range (ie. pages that - * we are interested in). This will put all the pages in + * we are interested in). This will put all the pages in * range back to page allocator as MIGRATE_ISOLATE. * * When this is done, we take the pages in range from page @@ -9042,9 +9076,9 @@ int alloc_contig_range(unsigned long start, unsigned long end, */ ret = start_isolate_page_range(pfn_max_align_down(start), - pfn_max_align_up(end), migratetype, 0); + pfn_max_align_up(end), migratetype, 0, gfp_mask); if (ret) - return ret; + goto done; drain_all_pages(cc.zone); @@ -9064,7 +9098,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, ret = 0; /* - * Pages from [start, end) are within a MAX_ORDER_NR_PAGES + * Pages from [start, end) are within a pageblock_nr_pages * aligned blocks that are marked as MIGRATE_ISOLATE. What's * more, all pages in [start, end) are free in page allocator. * What we are going to do is to allocate all pages from diff --git a/mm/page_isolation.c b/mm/page_isolation.c index c2f7a8bb634d..8a0f16d2e4c3 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -203,7 +203,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ return -EBUSY; } -static void unset_migratetype_isolate(struct page *page, unsigned migratetype) +static void unset_migratetype_isolate(struct page *page, int migratetype) { struct zone *zone; unsigned long flags, nr_pages; @@ -279,6 +279,166 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) return NULL; } +/** + * isolate_single_pageblock() -- tries to isolate a pageblock that might be + * within a free or in-use page. + * @boundary_pfn: pageblock-aligned pfn that a page might cross + * @gfp_flags: GFP flags used for migrating pages + * @isolate_before: isolate the pageblock before the boundary_pfn + * + * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one + * pageblock. When not all pageblocks within a page are isolated at the same + * time, free page accounting can go wrong. For example, in the case of + * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks. + * [ MAX_ORDER-1 ] + * [ pageblock0 | pageblock1 ] + * When either pageblock is isolated, if it is a free page, the page is not + * split into separate migratetype lists, which is supposed to; if it is an + * in-use page and freed later, __free_one_page() does not split the free page + * either. The function handles this by splitting the free page or migrating + * the in-use page then splitting the free page. + */ +static int isolate_single_pageblock(unsigned long boundary_pfn, gfp_t gfp_flags, + bool isolate_before) +{ + unsigned char saved_mt; + unsigned long start_pfn; + unsigned long isolate_pageblock; + unsigned long pfn; + struct zone *zone; + + VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages)); + + if (isolate_before) + isolate_pageblock = boundary_pfn - pageblock_nr_pages; + else + isolate_pageblock = boundary_pfn; + + /* + * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid + * only isolating a subset of pageblocks from a bigger than pageblock + * free or in-use page. Also make sure all to-be-isolated pageblocks + * are within the same zone. + */ + zone = page_zone(pfn_to_page(isolate_pageblock)); + start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), + zone->zone_start_pfn); + + saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); + set_pageblock_migratetype(pfn_to_page(isolate_pageblock), MIGRATE_ISOLATE); + + /* + * Bail out early when the to-be-isolated pageblock does not form + * a free or in-use page across boundary_pfn: + * + * 1. isolate before boundary_pfn: the page after is not online + * 2. isolate after boundary_pfn: the page before is not online + * + * This also ensures correctness. Without it, when isolate after + * boundary_pfn and [start_pfn, boundary_pfn) are not online, + * __first_valid_page() will return unexpected NULL in the for loop + * below. + */ + if (isolate_before) { + if (!pfn_to_online_page(boundary_pfn)) + return 0; + } else { + if (!pfn_to_online_page(boundary_pfn - 1)) + return 0; + } + + for (pfn = start_pfn; pfn < boundary_pfn;) { + struct page *page = __first_valid_page(pfn, boundary_pfn - pfn); + + VM_BUG_ON(!page); + pfn = page_to_pfn(page); + /* + * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any + * free pages in [start_pfn, boundary_pfn), its head page will + * always be in the range. + */ + if (PageBuddy(page)) { + int order = buddy_order(page); + + if (pfn + (1UL << order) > boundary_pfn) + split_free_page(page, order, boundary_pfn - pfn); + pfn += (1UL << order); + continue; + } + /* + * migrate compound pages then let the free page handling code + * above do the rest. If migration is not possible, just fail. + */ + if (PageCompound(page)) { + unsigned long nr_pages = compound_nr(page); + struct page *head = compound_head(page); + unsigned long head_pfn = page_to_pfn(head); + + if (head_pfn + nr_pages < boundary_pfn) { + pfn = head_pfn + nr_pages; + continue; + } +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + /* + * hugetlb, lru compound (THP), and movable compound pages + * can be migrated. Otherwise, fail the isolation. + */ + if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) { + int order; + unsigned long outer_pfn; + int ret; + struct compact_control cc = { + .nr_migratepages = 0, + .order = -1, + .zone = page_zone(pfn_to_page(head_pfn)), + .mode = MIGRATE_SYNC, + .ignore_skip_hint = true, + .no_set_skip_hint = true, + .gfp_mask = gfp_flags, + .alloc_contig = true, + }; + INIT_LIST_HEAD(&cc.migratepages); + + ret = __alloc_contig_migrate_range(&cc, head_pfn, + head_pfn + nr_pages); + + if (ret) + goto failed; + /* + * reset pfn to the head of the free page, so + * that the free page handling code above can split + * the free page to the right migratetype list. + * + * head_pfn is not used here as a hugetlb page order + * can be bigger than MAX_ORDER-1, but after it is + * freed, the free page order is not. Use pfn within + * the range to find the head of the free page. + */ + order = 0; + outer_pfn = pfn; + while (!PageBuddy(pfn_to_page(outer_pfn))) { + if (++order >= MAX_ORDER) { + outer_pfn = pfn; + break; + } + outer_pfn &= ~0UL << order; + } + pfn = outer_pfn; + continue; + } else +#endif + goto failed; + } + + pfn++; + } + return 0; +failed: + /* restore the original migratetype */ + set_pageblock_migratetype(pfn_to_page(isolate_pageblock), saved_mt); + return -EBUSY; +} + /** * start_isolate_page_range() - make page-allocation-type of range of pages to * be MIGRATE_ISOLATE. @@ -293,6 +453,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * and PageOffline() pages. * REPORT_FAILURE - report details about the failure to * isolate the range + * @gfp_flags: GFP flags used for migrating pages that sit across the + * range boundaries. * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the @@ -301,6 +463,10 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * pages in the range finally, the caller have to free all pages in the range. * test_page_isolated() can be used for test it. * + * The function first tries to isolate the pageblocks at the beginning and end + * of the range, since there might be pages across the range boundaries. + * Afterwards, it isolates the rest of the range. + * * There is no high level synchronization mechanism that prevents two threads * from trying to isolate overlapping ranges. If this happens, one thread * will notice pageblocks in the overlapping range already set to isolate. @@ -321,21 +487,38 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * Return: 0 on success and -EBUSY if any part of range cannot be isolated. */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype, int flags) + int migratetype, int flags, gfp_t gfp_flags) { unsigned long pfn; struct page *page; + int ret; BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); - for (pfn = start_pfn; - pfn < end_pfn; + /* isolate [start_pfn, start_pfn + pageblock_nr_pages) pageblock */ + ret = isolate_single_pageblock(start_pfn, gfp_flags, false); + if (ret) + return ret; + + /* isolate [end_pfn - pageblock_nr_pages, end_pfn) pageblock */ + ret = isolate_single_pageblock(end_pfn, gfp_flags, true); + if (ret) { + unset_migratetype_isolate(pfn_to_page(start_pfn), migratetype); + return ret; + } + + /* skip isolated pageblocks at the beginning and end */ + for (pfn = start_pfn + pageblock_nr_pages; + pfn < end_pfn - pageblock_nr_pages; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); if (page && set_migratetype_isolate(page, migratetype, flags, start_pfn, end_pfn)) { undo_isolate_page_range(start_pfn, pfn, migratetype); + unset_migratetype_isolate( + pfn_to_page(end_pfn - pageblock_nr_pages), + migratetype); return -EBUSY; } } @@ -346,7 +529,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, * Make isolated pages available again. */ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, - unsigned migratetype) + int migratetype) { unsigned long pfn; struct page *page; -- 2.35.1 -- Best Regards, Yan, Zi
Attachment:
signature.asc
Description: OpenPGP digital signature