On 25 May 2022, at 17:48, Marek Szyprowski wrote: > On 24.05.2022 21:47, Zi Yan wrote: >> From: Zi Yan <ziy@xxxxxxxxxx> >> >> In isolate_single_pageblock() called by start_isolate_page_range(), >> there are some pageblock isolation issues causing a potential >> infinite loop when isolating a page range. This is reported by Qian Cai. >> >> 1. the pageblock was isolated by just changing pageblock migratetype >> without checking unmovable pages. Calling set_migratetype_isolate() to >> isolate pageblock properly. >> 2. an off-by-one error caused migrating pages unnecessarily, since the page >> is not crossing pageblock boundary. >> 3. migrating a compound page across pageblock boundary then splitting the >> free page later has a small race window that the free page might be >> allocated again, so that the code will try again, causing an potential >> infinite loop. Temporarily set the to-be-migrated page's pageblock to >> MIGRATE_ISOLATE to prevent that and bail out early if no free page is >> found after page migration. >> >> An additional fix to split_free_page() aims to avoid crashing in >> __free_one_page(). When the free page is split at the specified >> split_pfn_offset, free_page_order should check both the first bit of >> free_page_pfn and the last bit of split_pfn_offset and use the smaller one. >> For example, if free_page_pfn=0x10000, split_pfn_offset=0xc000, >> free_page_order should first be 0x8000 then 0x4000, instead of 0x4000 then >> 0x8000, which the original algorithm did. >> >> Fixes: b2c9e2fbba ("mm: make alloc_contig_range work at pageblock granularity") >> Reported-by: Qian Cai <quic_qiancai@xxxxxxxxxxx> >> Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> > > This patch landed in linux next-20220525 as commit 29a8af92b874 ("mm: > fix a potential infinite loop in start_isolate_page_range()"). > Unfortunately it breaks all CMA allocations done by the DMA-mapping > framework. I've observed this on ARM 32bit and 64bit. In the logs I only > see messages like: > > cma: cma_alloc: linux,cma: alloc failed, req-size: 128 pages, ret: -16 > > I will try to analyze it a bit more tomorrow, but it looks that > isolation always fails. > Hi Marek, Thanks for reporting the issue. Can you try the patch below to see if it fixes the issue? Basically, the bug introduced by this commit is that it does not consider the situation when a smaller than pageblock range is to be isolated, the set_migratetype_isolate() in the second isolate_single_pageblock() called by start_isolate_page_range() returns with a failure. Skipping isolating the pageblock which has been isolated by the first isolate_single_pageblock() solves the issue. The patch below also includes the fix for the free memory accounting issue. diff --git a/mm/internal.h b/mm/internal.h index 64e61b032dac..c0f8fbe0445b 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -374,8 +374,8 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, int nid, bool exact_nid); -void split_free_page(struct page *free_page, - int order, unsigned long split_pfn_offset); +int split_free_page(struct page *free_page, + unsigned int order, unsigned long split_pfn_offset); #if defined CONFIG_COMPACTION || defined CONFIG_CMA diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bc93a82e51e6..6f6e4649ac21 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1100,30 +1100,44 @@ static inline void __free_one_page(struct page *page, * @order: the order of the page * @split_pfn_offset: split offset within the page * + * Return -ENOENT if the free page is changed, otherwise 0 + * * It is used when the free page crosses two pageblocks with different migratetypes * at split_pfn_offset within the page. The split free page will be put into * separate migratetype lists afterwards. Otherwise, the function achieves * nothing. */ -void split_free_page(struct page *free_page, - int order, unsigned long split_pfn_offset) +int split_free_page(struct page *free_page, + unsigned int order, unsigned long split_pfn_offset) { struct zone *zone = page_zone(free_page); unsigned long free_page_pfn = page_to_pfn(free_page); unsigned long pfn; unsigned long flags; int free_page_order; + int mt; + int ret = 0; if (split_pfn_offset == 0) - return; + return ret; spin_lock_irqsave(&zone->lock, flags); + + if (!PageBuddy(free_page) || buddy_order(free_page) != order) { + ret = -ENOENT; + goto out; + } + + mt = get_pageblock_migratetype(free_page); + if (likely(!is_migrate_isolate(mt))) + __mod_zone_freepage_state(zone, -(1UL << order), mt); + del_page_from_free_list(free_page, zone, order); for (pfn = free_page_pfn; pfn < free_page_pfn + (1UL << order);) { int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn); - free_page_order = min_t(int, + free_page_order = min_t(unsigned int, pfn ? __ffs(pfn) : order, __fls(split_pfn_offset)); __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order, @@ -1134,7 +1148,9 @@ void split_free_page(struct page *free_page, if (split_pfn_offset == 0) split_pfn_offset = (1UL << order) - (pfn - free_page_pfn); } +out: spin_unlock_irqrestore(&zone->lock, flags); + return ret; } /* * A bad page could be due to a number of fields. Instead of multiple branches, diff --git a/mm/page_isolation.c b/mm/page_isolation.c index c643c8420809..f539ccf7fb44 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -300,7 +300,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * the in-use page then splitting the free page. */ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, - gfp_t gfp_flags, bool isolate_before) + gfp_t gfp_flags, bool isolate_before, bool skip_isolation) { unsigned char saved_mt; unsigned long start_pfn; @@ -327,11 +327,16 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, zone->zone_start_pfn); saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); - ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, - isolate_pageblock, isolate_pageblock + pageblock_nr_pages); - if (ret) - return ret; + if (skip_isolation) + VM_BUG_ON(!is_migrate_isolate(saved_mt)); + else { + ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, + isolate_pageblock, isolate_pageblock + pageblock_nr_pages); + + if (ret) + return ret; + } /* * Bail out early when the to-be-isolated pageblock does not form @@ -367,8 +372,11 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, int order = buddy_order(page); if (pfn + (1UL << order) > boundary_pfn) - split_free_page(page, order, boundary_pfn - pfn); - pfn += (1UL << order); + /* free page changed before split, check it again */ + if (split_free_page(page, order, boundary_pfn - pfn)) + continue; + + pfn += 1UL << order; continue; } /* @@ -463,7 +471,8 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, return 0; failed: /* restore the original migratetype */ - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); + if (!skip_isolation) + unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); return -EBUSY; } @@ -522,14 +531,18 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages); unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages); int ret; + bool skip_isolation = false; /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ - ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false); + ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation); if (ret) return ret; + if (isolate_start == isolate_end - pageblock_nr_pages) + skip_isolation = true; + /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ - ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true); + ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation); if (ret) { unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); return ret; -- Best Regards, Yan, Zi
Attachment:
signature.asc
Description: OpenPGP digital signature