Before parsing SRAT, memblock has already reserved some memory ranges for other purposes, such as for kernel image. We cannot prevent kernel from using these memory. Furthermore, if all the memory is hotpluggable, then the system won't have enough memory to boot if we set all of them as movable. So we always set the nodes which the kernel resides in as non-movable. Signed-off-by: Tang Chen <tangchen@xxxxxxxxxxxxxx> --- arch/x86/mm/numa.c | 25 +++++++++++++++++++------ arch/x86/mm/srat.c | 17 ++++++++++++++++- include/linux/mm.h | 1 + 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 73e7934..dcaf248 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -736,24 +736,37 @@ static void __init early_x86_numa_init_mapping(void) * we will put pagetable pages in local node even if the memory of that node is * hotpluggable. * - * If users specify movablemem_map=acpi, then: + * And, when the kernel is booting, memblock has reserved some memory for other + * purpose, such as storing kernel image. We cannot prevent the kernel from + * using this kind of memory. So whatever node the kernel resides in should be + * un-hotpluggable, because if all the memory is hotpluggable, and is set as + * movable, the kernel won't have enough memory to boot. + * + * It works like this: + * If users specify movablemem_map=acpi, then * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 - * hotpluggable: n y y n + * hotpluggable: y y y n + * kernel resides in: y n n n * movablemem_map: |_____| |_________| */ static void __init early_mem_hotplug_init() { - int i; + int i, nid; if (!movablemem_map.acpi) return; for (i = 0; i < numa_meminfo.nr_blks; i++) { - if (numa_meminfo.blk[i].hotpluggable) - movablemem_map_add_region(numa_meminfo.blk[i].start, - numa_meminfo.blk[i].end); + nid = numa_meminfo_all.blk[i].nid; + + if (node_isset(nid, movablemem_map.numa_nodes_kernel) || + !numa_meminfo.blk[i].hotpluggable) + continue; + + movablemem_map_add_region(numa_meminfo.blk[i].start, + numa_meminfo.blk[i].end); } } #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index f7f6fd4..0b5904e 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -147,7 +147,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { u64 start, end; u32 hotpluggable; - int node, pxm; + int node, pxm, i; + struct memblock_type *rgn = &memblock.reserved; if (srat_disabled()) goto out_err; @@ -176,6 +177,20 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); + /* + * Before parsing SRAT, memblock has reserved some memory for other + * purpose, such as storing kernel image. We cannot prevent the kernel + * from using this kind of memory. So just mark which nodes the kernel + * resides in, and set these nodes un-hotpluggable later. + */ + for (i = 0; i < rgn->cnt; i++) { + if (end <= rgn->regions[i].base || + start >= rgn->regions[i].base + rgn->regions[i].size) + continue; + + node_set(node, movablemem_map.numa_nodes_kernel); + } + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1, diff --git a/include/linux/mm.h b/include/linux/mm.h index 7468221..2835c91 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1342,6 +1342,7 @@ struct movablemem_map { bool acpi; int nr_map; struct movablemem_entry map[MOVABLEMEM_MAP_MAX]; + nodemask_t numa_nodes_kernel; /* on which nodes kernel resides in */ }; extern struct movablemem_map movablemem_map; -- 1.7.1 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>