Friendly Ping On Mon, 11 Sept 2023 at 21:39, Liam Ni <zhiguangni01@xxxxxxxxx> wrote: > > Optimize the way of calculating missing pages. > > In the previous implementation, We calculate missing pages as follows: > 1. calculate numaram by traverse all the numa_meminfo's and for each of > them traverse all the regions in memblock.memory to prepare for > counting missing pages. > > 2. Traverse all the regions in memblock.memory again to get e820ram. > > 3. the missing page is (e820ram - numaram ) > > But,it's enough to count memory in ‘memblock.memory’ that doesn't have > the node assigned. > > V3:https://lore.kernel.org/all/CACZJ9cUXiWxDb6hF4JFhWe7Np82k6LopVQ+_AoGFOccN4kjJqA@xxxxxxxxxxxxxx/#r > V2:https://lore.kernel.org/all/20230619075315.49114-1-zhiguangni01@xxxxxxxxx/ > V1:https://lore.kernel.org/all/20230615142016.419570-1-zhiguangni01@xxxxxxxxx/ > > Signed-off-by: Liam Ni <zhiguangni01@xxxxxxxxx> > --- > arch/x86/mm/numa.c | 33 +-------------------------------- > include/linux/memblock.h | 1 + > mm/memblock.c | 21 +++++++++++++++++++++ > 3 files changed, 23 insertions(+), 32 deletions(-) > > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c > index 2aadb2019b4f..ee5f08adfaf6 100644 > --- a/arch/x86/mm/numa.c > +++ b/arch/x86/mm/numa.c > @@ -447,37 +447,6 @@ int __node_distance(int from, int to) > } > EXPORT_SYMBOL(__node_distance); > > -/* > - * Sanity check to catch more bad NUMA configurations (they are amazingly > - * common). Make sure the nodes cover all memory. > - */ > -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) > -{ > - u64 numaram, e820ram; > - int i; > - > - numaram = 0; > - for (i = 0; i < mi->nr_blks; i++) { > - u64 s = mi->blk[i].start >> PAGE_SHIFT; > - u64 e = mi->blk[i].end >> PAGE_SHIFT; > - numaram += e - s; > - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); > - if ((s64)numaram < 0) > - numaram = 0; > - } > - > - e820ram = max_pfn - absent_pages_in_range(0, max_pfn); > - > - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ > - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { > - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", > - (numaram << PAGE_SHIFT) >> 20, > - (e820ram << PAGE_SHIFT) >> 20); > - return false; > - } > - return true; > -} > - > /* > * Mark all currently memblock-reserved physical memory (which covers the > * kernel's own memory ranges) as hot-unswappable. > @@ -583,7 +552,7 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) > return -EINVAL; > } > } > - if (!numa_meminfo_cover_memory(mi)) > + if (!memblock_validate_numa_coverage(SZ_1M)) > return -EINVAL; > > /* Finally register nodes. */ > diff --git a/include/linux/memblock.h b/include/linux/memblock.h > index 1c1072e3ca06..0c3a193ebc58 100644 > --- a/include/linux/memblock.h > +++ b/include/linux/memblock.h > @@ -120,6 +120,7 @@ int memblock_physmem_add(phys_addr_t base, phys_addr_t size); > void memblock_trim_memory(phys_addr_t align); > bool memblock_overlaps_region(struct memblock_type *type, > phys_addr_t base, phys_addr_t size); > +bool memblock_validate_numa_coverage(const u64 limit); > int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); > int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); > int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); > diff --git a/mm/memblock.c b/mm/memblock.c > index 0863222af4a4..8f61f868bec1 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -734,6 +734,27 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) > return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); > } > > +bool __init_memblock memblock_validate_numa_coverage(const u64 limit) > +{ > + unsigned long lose_pg = 0; > + unsigned long start_pfn, end_pfn; > + int nid, i; > + > + /* calculate lose page */ > + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { > + if (nid == NUMA_NO_NODE) > + lose_pg += end_pfn - start_pfn; > + } > + > + if (lose_pg >= limit) { > + pr_err("NUMA: We lost %ld pages.\n", lose_pg); > + return false; > + } > + > + return true; > +} > + > + > /** > * memblock_isolate_range - isolate given range into disjoint memblocks > * @type: memblock type to isolate range for > -- > 2.25.1 >