From: Jia He <jia.he@xxxxxxxxxxxxxxxx> Subject: mm: page_alloc: restore memblock_next_valid_pfn() on arm/arm64 b92df1de5d28 ("mm: page_alloc: skip over regions of invalid pfns where possible") optimized the loop in memmap_init_zone(). But it causes possible panic bug. So Daniel Vacek reverted it later. But as suggested by Daniel Vacek, it is fine to using memblock to skip gaps and finding next valid frame with CONFIG_HAVE_ARCH_PFN_VALID. Daniel said: "On arm and arm64, memblock is used by default. But generic version of pfn_valid() is based on mem sections and memblock_next_valid_pfn() does not always return the next valid one but skips more resulting in some valid frames to be skipped (as if they were invalid). And that's why kernel was eventually crashing on some !arm machines." About the performance consideration: As said by James in b92df1de5, "I have tested this patch on a virtual model of a Samurai CPU with a sparse memory map. The kernel boot time drops from 109 to 62 seconds." Thus it would be better if we remain memblock_next_valid_pfn on arm/arm64. Link: http://lkml.kernel.org/r/1534907237-2982-3-git-send-email-jia.he@xxxxxxxxxxxxxxxx Signed-off-by: Jia He <jia.he@xxxxxxxxxxxxxxxx> Suggested-by: Daniel Vacek <neelx@xxxxxxxxxx> Cc: AKASHI Takahiro <takahiro.akashi@xxxxxxxxxx> Cc: Andrey Ryabinin <aryabinin@xxxxxxxxxxxxx> Cc: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx> Cc: Catalin Marinas <catalin.marinas@xxxxxxx> Cc: Daniel Jordan <daniel.m.jordan@xxxxxxxxxx> Cc: Eugeniu Rosca <erosca@xxxxxxxxxxxxxx> Cc: Gioh Kim <gi-oh.kim@xxxxxxxxxxxxxxxx> Cc: James Morse <james.morse@xxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Kemi Wang <kemi.wang@xxxxxxxxx> Cc: Laura Abbott <labbott@xxxxxxxxxx> Cc: Mark Rutland <mark.rutland@xxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Nikolay Borisov <nborisov@xxxxxxxx> Cc: Petr Tesarik <ptesarik@xxxxxxxx> Cc: Philip Derrin <philip@cog.systems> Cc: Russell King <linux@xxxxxxxxxxxxxxx> Cc: Steve Capper <steve.capper@xxxxxxx> Cc: Vladimir Murzin <vladimir.murzin@xxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Wei Yang <richard.weiyang@xxxxxxxxx> Cc: Will Deacon <will.deacon@xxxxxxx> Cc: YASUAKI ISHIMATSU <yasu.isimatu@xxxxxxxxx> Cc: Pavel Tatashin <pasha.tatashin@xxxxxxxxxx> Cc: Pavel Tatashin <pavel.tatashin@xxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/mmzone.h | 9 +++++++++ mm/memblock.c | 30 ++++++++++++++++++++++++++++++ mm/page_alloc.c | 5 ++++- 3 files changed, 43 insertions(+), 1 deletion(-) --- a/include/linux/mmzone.h~mm-page_alloc-remain-memblock_next_valid_pfn-on-arm-arm64 +++ a/include/linux/mmzone.h @@ -1263,6 +1263,10 @@ static inline int pfn_present(unsigned l #endif #define early_pfn_valid(pfn) pfn_valid(pfn) +#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID +extern unsigned long memblock_next_valid_pfn(unsigned long pfn); +#define next_valid_pfn(pfn) memblock_next_valid_pfn(pfn) +#endif void sparse_init(void); #else #define sparse_init() do {} while (0) @@ -1284,6 +1288,11 @@ struct mminit_pfnnid_cache { #define early_pfn_valid(pfn) (1) #endif +/* fallback to default definitions*/ +#ifndef next_valid_pfn +#define next_valid_pfn(pfn) (pfn + 1) +#endif + void memory_present(int nid, unsigned long start, unsigned long end); /* --- a/mm/memblock.c~mm-page_alloc-remain-memblock_next_valid_pfn-on-arm-arm64 +++ a/mm/memblock.c @@ -1231,6 +1231,36 @@ int __init_memblock memblock_set_node(ph } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +#ifdef CONFIG_HAVE_MEMBLOCK_PFN_VALID +unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn) +{ + struct memblock_type *type = &memblock.memory; + unsigned int right = type->cnt; + unsigned int mid, left = 0; + phys_addr_t addr = PFN_PHYS(++pfn); + + do { + mid = (right + left) / 2; + + if (addr < type->regions[mid].base) + right = mid; + else if (addr >= (type->regions[mid].base + + type->regions[mid].size)) + left = mid + 1; + else { + /* addr is within the region, so pfn is valid */ + return pfn; + } + } while (left < right); + + if (right == type->cnt) + return -1UL; + else + return PHYS_PFN(type->regions[right].base); +} +EXPORT_SYMBOL(memblock_next_valid_pfn); +#endif /*CONFIG_HAVE_MEMBLOCK_PFN_VALID*/ + static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t end, int nid, --- a/mm/page_alloc.c~mm-page_alloc-remain-memblock_next_valid_pfn-on-arm-arm64 +++ a/mm/page_alloc.c @@ -5485,8 +5485,11 @@ void __meminit memmap_init_zone(unsigned if (context != MEMMAP_EARLY) goto not_early; - if (!early_pfn_valid(pfn)) + if (!early_pfn_valid(pfn)) { + pfn = next_valid_pfn(pfn) - 1; continue; + } + if (!early_pfn_in_nid(pfn, nid)) continue; if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) _