This is a note to let you know that I've just added the patch titled NUMA: optimize detection of memory with no node id assigned by firmware to the 6.6-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: numa-optimize-detection-of-memory-with-no-node-id-as.patch and it can be found in the queue-6.6 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit bb76e557e6106339bddd8a8e0bc858e8d46d9702 Author: Liam Ni <zhiguangni01@xxxxxxxxx> Date: Thu Oct 26 10:03:29 2023 +0800 NUMA: optimize detection of memory with no node id assigned by firmware [ Upstream commit ff6c3d81f2e86b63a3a530683f89ef393882782a ] Sanity check that makes sure the nodes cover all memory loops over numa_meminfo to count the pages that have node id assigned by the firmware, then loops again over memblock.memory to find the total amount of memory and in the end checks that the difference between the total memory and memory that covered by nodes is less than some threshold. Worse, the loop over numa_meminfo calls __absent_pages_in_range() that also partially traverses memblock.memory. It's much simpler and more efficient to have a single traversal of memblock.memory that verifies that amount of memory not covered by nodes is less than a threshold. Introduce memblock_validate_numa_coverage() that does exactly that and use it instead of numa_meminfo_cover_memory(). Link: https://lkml.kernel.org/r/20231026020329.327329-1-zhiguangni01@xxxxxxxxx Signed-off-by: Liam Ni <zhiguangni01@xxxxxxxxx> Reviewed-by: Mike Rapoport (IBM) <rppt@xxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Bibo Mao <maobibo@xxxxxxxxxxx> Cc: Binbin Zhou <zhoubinbin@xxxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: Feiyang Chen <chenfeiyang@xxxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: Huacai Chen <chenhuacai@xxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: WANG Xuerui <kernel@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Stable-dep-of: 9cdc6423acb4 ("memblock: allow zero threshold in validate_numa_converage()") Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index 6e65ff12d5c7..8fe21f868f72 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -226,32 +226,6 @@ static void __init node_mem_init(unsigned int node) #ifdef CONFIG_ACPI_NUMA -/* - * Sanity check to catch more bad NUMA configurations (they are amazingly - * common). Make sure the nodes cover all memory. - */ -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) -{ - int i; - u64 numaram, biosram; - - numaram = 0; - for (i = 0; i < mi->nr_blks; i++) { - u64 s = mi->blk[i].start >> PAGE_SHIFT; - u64 e = mi->blk[i].end >> PAGE_SHIFT; - - numaram += e - s; - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); - if ((s64)numaram < 0) - numaram = 0; - } - max_pfn = max_low_pfn; - biosram = max_pfn - absent_pages_in_range(0, max_pfn); - - BUG_ON((s64)(biosram - numaram) >= (1 << (20 - PAGE_SHIFT))); - return true; -} - static void __init add_node_intersection(u32 node, u64 start, u64 size, u32 type) { static unsigned long num_physpages; @@ -396,7 +370,7 @@ int __init init_numa_memory(void) return -EINVAL; init_node_memblock(); - if (numa_meminfo_cover_memory(&numa_meminfo) == false) + if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; for_each_node_mask(node, node_possible_map) { diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index c7fa5396c0f0..2c67bfc3cf32 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -448,37 +448,6 @@ int __node_distance(int from, int to) } EXPORT_SYMBOL(__node_distance); -/* - * Sanity check to catch more bad NUMA configurations (they are amazingly - * common). Make sure the nodes cover all memory. - */ -static bool __init numa_meminfo_cover_memory(const struct numa_meminfo *mi) -{ - u64 numaram, e820ram; - int i; - - numaram = 0; - for (i = 0; i < mi->nr_blks; i++) { - u64 s = mi->blk[i].start >> PAGE_SHIFT; - u64 e = mi->blk[i].end >> PAGE_SHIFT; - numaram += e - s; - numaram -= __absent_pages_in_range(mi->blk[i].nid, s, e); - if ((s64)numaram < 0) - numaram = 0; - } - - e820ram = max_pfn - absent_pages_in_range(0, max_pfn); - - /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ - if ((s64)(e820ram - numaram) >= (1 << (20 - PAGE_SHIFT))) { - printk(KERN_ERR "NUMA: nodes only cover %LuMB of your %LuMB e820 RAM. Not used.\n", - (numaram << PAGE_SHIFT) >> 20, - (e820ram << PAGE_SHIFT) >> 20); - return false; - } - return true; -} - /* * Mark all currently memblock-reserved physical memory (which covers the * kernel's own memory ranges) as hot-unswappable. @@ -584,7 +553,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) return -EINVAL; } } - if (!numa_meminfo_cover_memory(mi)) + + if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; /* Finally register nodes. */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ed57c23f80ac..ed64240041e8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -122,6 +122,7 @@ unsigned long memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2, phys_addr_t size2); bool memblock_overlaps_region(struct memblock_type *type, phys_addr_t base, phys_addr_t size); +bool memblock_validate_numa_coverage(unsigned long threshold_bytes); int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index d630f5c2bdb9..3a3ab73546f5 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -735,6 +735,40 @@ int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size) return memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0); } +/** + * memblock_validate_numa_coverage - check if amount of memory with + * no node ID assigned is less than a threshold + * @threshold_bytes: maximal number of pages that can have unassigned node + * ID (in bytes). + * + * A buggy firmware may report memory that does not belong to any node. + * Check if amount of such memory is below @threshold_bytes. + * + * Return: true on success, false on failure. + */ +bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_bytes) +{ + unsigned long nr_pages = 0; + unsigned long start_pfn, end_pfn, mem_size_mb; + int nid, i; + + /* calculate lose page */ + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { + if (nid == NUMA_NO_NODE) + nr_pages += end_pfn - start_pfn; + } + + if ((nr_pages << PAGE_SHIFT) >= threshold_bytes) { + mem_size_mb = memblock_phys_mem_size() >> 20; + pr_err("NUMA: no nodes coverage for %luMB of %luMB RAM\n", + (nr_pages << PAGE_SHIFT) >> 20, mem_size_mb); + return false; + } + + return true; +} + + /** * memblock_isolate_range - isolate given range into disjoint memblocks * @type: memblock type to isolate range for