From: Zi Yan <ziy@xxxxxxxxxx> deferred_init only initializes first section of a zone and defers the rest and the rest of the zone will be initialized in size of a section. When MAX_ORDER grows beyond a section size, early_page_uninitialised() did not prevent pages beyond first section from initialization, since it only checked the starting pfn and assumes MAX_ORDER is smaller than a section size. In addition, deferred_init_maxorder() uses MAX_ORDER_NR_PAGES as the initialization unit, which can cause the initialized chunk of memory overlapping with other initialization jobs. For the first issue, make early_page_uninitialised() decrease the order for non-deferred memory initialization when it is bigger than first section. For the second issue, when adjust pfn alignment in deferred_init_maxorder(), make sure the alignment is not bigger than a section size. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- mm/internal.h | 2 +- mm/memblock.c | 6 ++++-- mm/page_alloc.c | 26 +++++++++++++++++++------- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 1433e3a6fdd0..cbe745670c6e 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -355,7 +355,7 @@ extern int __isolate_free_page(struct page *page, unsigned int order); extern void __putback_isolated_page(struct page *page, unsigned int order, int mt); extern void memblock_free_pages(struct page *page, unsigned long pfn, - unsigned int order); + unsigned int *order); extern void __free_pages_core(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned int order); extern void post_alloc_hook(struct page *page, unsigned int order, diff --git a/mm/memblock.c b/mm/memblock.c index d1525463c05e..dc2ce6df8fe3 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1640,7 +1640,9 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size) end = PFN_DOWN(base + size); for (; cursor < end; cursor++) { - memblock_free_pages(pfn_to_page(cursor), cursor, 0); + unsigned int order = 0; + + memblock_free_pages(pfn_to_page(cursor), cursor, &order); totalram_pages_inc(); } } @@ -2035,7 +2037,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end) while (start + (1UL << order) > end) order--; - memblock_free_pages(pfn_to_page(start), start, order); + memblock_free_pages(pfn_to_page(start), start, &order); start += (1UL << order); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07ad8074950f..3f3af7cd5164 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -463,13 +463,19 @@ static inline bool deferred_pages_enabled(void) } /* Returns true if the struct page for the pfn is uninitialised */ -static inline bool __meminit early_page_uninitialised(unsigned long pfn) +static inline bool __meminit early_page_uninitialised(unsigned long pfn, unsigned int *order) { int nid = early_pfn_to_nid(pfn); if (node_online(nid) && pfn >= NODE_DATA(nid)->first_deferred_pfn) return true; + /* clamp down order to not exceed first_deferred_pfn */ + if (order) + *order = min_t(unsigned int, + *order, + ilog2(NODE_DATA(nid)->first_deferred_pfn - pfn)); + return false; } @@ -515,7 +521,7 @@ static inline bool deferred_pages_enabled(void) return false; } -static inline bool early_page_uninitialised(unsigned long pfn) +static inline bool early_page_uninitialised(unsigned long pfn, unsigned int *order) { return false; } @@ -1644,7 +1650,7 @@ static void __meminit init_reserved_page(unsigned long pfn) pg_data_t *pgdat; int nid, zid; - if (!early_page_uninitialised(pfn)) + if (!early_page_uninitialised(pfn, NULL)) return; nid = early_pfn_to_nid(pfn); @@ -1800,11 +1806,11 @@ int __meminit early_pfn_to_nid(unsigned long pfn) #endif /* CONFIG_NUMA */ void __init memblock_free_pages(struct page *page, unsigned long pfn, - unsigned int order) + unsigned int *order) { - if (early_page_uninitialised(pfn)) + if (early_page_uninitialised(pfn, order)) return; - __free_pages_core(page, order); + __free_pages_core(page, *order); } /* @@ -2030,7 +2036,13 @@ static unsigned long __init deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn, unsigned long *end_pfn) { - unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES); + /* + * deferred_init_memmap_chunk gives out jobs with max size to + * PAGES_PER_SECTION. Do not align mo_pfn beyond that. + */ + unsigned long align = min_t(unsigned long, + MAX_ORDER_NR_PAGES, PAGES_PER_SECTION); + unsigned long mo_pfn = ALIGN(*start_pfn + 1, align); unsigned long spfn = *start_pfn, epfn = *end_pfn; unsigned long nr_pages = 0; u64 j = *i; -- 2.35.1