From: Baoquan He <bhe@xxxxxxxxxx> Subject: mm: memmap_init: iterate over memblock regions rather that check each PFN When called during boot the memmap_init_zone() function checks if each PFN is valid and actually belongs to the node being initialized using early_pfn_valid() and early_pfn_in_nid(). Each such check may cost up to O(log(n)) where n is the number of memory banks, so for large amount of memory overall time spent in early_pfn*() becomes substantial. Since the information is anyway present in memblock, we can iterate over memblock memory regions in memmap_init() and only call memmap_init_zone() for PFN ranges that are know to be valid and in the appropriate node. [cai@xxxxxx: fix a compilation warning from Clang] Link: http://lkml.kernel.org/r/CF6E407F-17DC-427C-8203-21979FB882EF@xxxxxx [bhe@xxxxxxxxxx: fix the incorrect hole in fast_isolate_freepages()] Link: http://lkml.kernel.org/r/8C537EB7-85EE-4DCF-943E-3CC0ED0DF56D@xxxxxx Link: http://lkml.kernel.org/r/20200521014407.29690-1-bhe@xxxxxxxxxx Link: http://lkml.kernel.org/r/20200412194859.12663-16-rppt@xxxxxxxxxx Signed-off-by: Baoquan He <bhe@xxxxxxxxxx> Signed-off-by: Mike Rapoport <rppt@xxxxxxxxxxxxx> Tested-by: Hoan Tran <hoan@xxxxxxxxxxxxxxxxxxxxxx> [arm64] Cc: Brian Cain <bcain@xxxxxxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx> Cc: Geert Uytterhoeven <geert@xxxxxxxxxxxxxx> Cc: Greentime Hu <green.hu@xxxxxxxxx> Cc: Greg Ungerer <gerg@xxxxxxxxxxxxxx> Cc: Guan Xuetao <gxt@xxxxxxxxxx> Cc: Guo Ren <guoren@xxxxxxxxxx> Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx> Cc: Helge Deller <deller@xxxxxx> Cc: "James E.J. Bottomley" <James.Bottomley@xxxxxxxxxxxxxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: Ley Foon Tan <ley.foon.tan@xxxxxxxxx> Cc: Mark Salter <msalter@xxxxxxxxxx> Cc: Matt Turner <mattst88@xxxxxxxxx> Cc: Max Filippov <jcmvbkbc@xxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Michal Simek <monstr@xxxxxxxxx> Cc: Nick Hu <nickhu@xxxxxxxxxxxxx> Cc: Paul Walmsley <paul.walmsley@xxxxxxxxxx> Cc: Richard Weinberger <richard@xxxxxx> Cc: Rich Felker <dalias@xxxxxxxx> Cc: Russell King <linux@xxxxxxxxxxxxxxx> Cc: Stafford Horne <shorne@xxxxxxxxx> Cc: Thomas Bogendoerfer <tsbogend@xxxxxxxxxxxxxxxx> Cc: Tony Luck <tony.luck@xxxxxxxxx> Cc: Vineet Gupta <vgupta@xxxxxxxxxxxx> Cc: Yoshinori Sato <ysato@xxxxxxxxxxxxxxxxxxxx> Cc: Qian Cai <cai@xxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/compaction.c | 4 +++- mm/page_alloc.c | 43 ++++++++++++++++--------------------------- 2 files changed, 19 insertions(+), 28 deletions(-) --- a/mm/compaction.c~mm-memmap_init-iterate-over-memblock-regions-rather-that-check-each-pfn +++ a/mm/compaction.c @@ -1409,7 +1409,9 @@ fast_isolate_freepages(struct compact_co cc->free_pfn = highest; } else { if (cc->direct_compaction && pfn_valid(min_pfn)) { - page = pfn_to_page(min_pfn); + page = pageblock_pfn_to_page(min_pfn, + pageblock_end_pfn(min_pfn), + cc->zone); cc->free_pfn = min_pfn; } } --- a/mm/page_alloc.c~mm-memmap_init-iterate-over-memblock-regions-rather-that-check-each-pfn +++ a/mm/page_alloc.c @@ -5951,23 +5951,6 @@ overlap_memmap_init(unsigned long zone, return false; } -#ifdef CONFIG_SPARSEMEM -/* Skip PFNs that belong to non-present sections */ -static inline __meminit unsigned long next_pfn(unsigned long pfn) -{ - const unsigned long section_nr = pfn_to_section_nr(++pfn); - - if (present_section_nr(section_nr)) - return pfn; - return section_nr_to_pfn(next_present_section_nr(section_nr)); -} -#else -static inline __meminit unsigned long next_pfn(unsigned long pfn) -{ - return pfn++; -} -#endif - /* * Initially all pages are reserved - free ones are freed * up by memblock_free_all() once the early boot process is @@ -6007,14 +5990,6 @@ void __meminit memmap_init_zone(unsigned * function. They do not exist on hotplugged memory. */ if (context == MEMMAP_EARLY) { - if (!early_pfn_valid(pfn)) { - pfn = next_pfn(pfn); - continue; - } - if (!early_pfn_in_nid(pfn, nid)) { - pfn++; - continue; - } if (overlap_memmap_init(zone, &pfn)) continue; if (defer_init(nid, pfn, end_pfn)) @@ -6130,9 +6105,23 @@ static void __meminit zone_init_free_lis } void __meminit __weak memmap_init(unsigned long size, int nid, - unsigned long zone, unsigned long start_pfn) + unsigned long zone, + unsigned long range_start_pfn) { - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, NULL); + unsigned long start_pfn, end_pfn; + unsigned long range_end_pfn = range_start_pfn + size; + int i; + + for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { + start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn); + end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn); + + if (end_pfn > start_pfn) { + size = end_pfn - start_pfn; + memmap_init_zone(size, nid, zone, start_pfn, + MEMMAP_EARLY, NULL); + } + } } static int zone_batchsize(struct zone *zone) _