+ x86_64-fix-fake-numa.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled

     x86_64: fix fake numa

has been added to the -mm tree.  Its filename is

     x86_64-fix-fake-numa.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: x86_64: fix fake numa
From: David Rientjes <rientjes@xxxxxxxxxx>



Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 arch/x86_64/kernel/setup.c  |    7 -
 arch/x86_64/mm/k8topology.c |   12 -
 arch/x86_64/mm/numa.c       |  228 +++++++++++++++++++++++++---------
 arch/x86_64/mm/srat.c       |   17 --
 include/asm-x86_64/numa.h   |    3 
 5 files changed, 174 insertions(+), 93 deletions(-)

diff -puN arch/x86_64/kernel/setup.c~x86_64-fix-fake-numa arch/x86_64/kernel/setup.c
--- a/arch/x86_64/kernel/setup.c~x86_64-fix-fake-numa
+++ a/arch/x86_64/kernel/setup.c
@@ -416,13 +416,6 @@ void __init setup_arch(char **cmdline_p)
 	max_pfn = end_pfn;
 	high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
 
-#ifdef CONFIG_ACPI_NUMA
-	/*
-	 * Parse SRAT to discover nodes.
-	 */
-	acpi_numa_init();
-#endif
-
 #ifdef CONFIG_NUMA
 	numa_initmem_init(0, end_pfn); 
 #else
diff -puN arch/x86_64/mm/k8topology.c~x86_64-fix-fake-numa arch/x86_64/mm/k8topology.c
--- a/arch/x86_64/mm/k8topology.c~x86_64-fix-fake-numa
+++ a/arch/x86_64/mm/k8topology.c
@@ -43,7 +43,6 @@ static __init int find_northbridge(void)
 int __init k8_scan_nodes(unsigned long start, unsigned long end)
 { 
 	unsigned long prevbase;
-	struct bootnode nodes[8];
 	int nodeid, i, nb; 
 	unsigned char nodeids[8];
 	int found = 0;
@@ -65,7 +64,6 @@ int __init k8_scan_nodes(unsigned long s
 
 	printk(KERN_INFO "Number of nodes %d\n", numnodes);
 
-	memset(&nodes,0,sizeof(nodes)); 
 	prevbase = 0;
 	for (i = 0; i < 8; i++) { 
 		unsigned long base,limit; 
@@ -155,22 +153,14 @@ int __init k8_scan_nodes(unsigned long s
 	if (!found)
 		return -1; 
 
-	memnode_shift = compute_hash_shift(nodes, 8);
-	if (memnode_shift < 0) { 
-		printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); 
-		return -1; 
-	} 
-	printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift); 
-
 	for (i = 0; i < 8; i++) {
 		if (nodes[i].start != nodes[i].end) { 
 			nodeid = nodeids[i];
 			apicid_to_node[nodeid << dualcore] = i;
 			apicid_to_node[(nodeid << dualcore) + dualcore] = i;
-			setup_node_bootmem(i, nodes[i].start, nodes[i].end); 
+			node_set_online(i);
 		} 
 	}
 
-	numa_init_array();
 	return 0;
 } 
diff -puN arch/x86_64/mm/numa.c~x86_64-fix-fake-numa arch/x86_64/mm/numa.c
--- a/arch/x86_64/mm/numa.c~x86_64-fix-fake-numa
+++ a/arch/x86_64/mm/numa.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */ 
@@ -11,6 +11,7 @@
 #include <linux/ctype.h>
 #include <linux/module.h>
 #include <linux/nodemask.h>
+#include <linux/acpi.h>
 
 #include <asm/e820.h>
 #include <asm/proto.h>
@@ -22,6 +23,7 @@
 #define Dprintk(x...)
 #endif
 
+struct bootnode nodes[MAX_NUMNODES] __initdata;
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
@@ -46,7 +48,7 @@ int numa_off __initdata;
  * -1 if node overlap or lost ram (shift too big)
  */
 static int __init
-populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
+populate_memnodemap(const struct bootnode *nodes, int shift)
 {
 	int i; 
 	int res = -1;
@@ -55,7 +57,7 @@ populate_memnodemap(const struct bootnod
 	if (shift >= 64)
 		return -1;
 	memset(memnodemap, 0xff, sizeof(memnodemap));
-	for (i = 0; i < numnodes; i++) {
+	for_each_online_node(i) {
 		addr = nodes[i].start;
 		end = nodes[i].end;
 		if (addr >= end)
@@ -73,17 +75,17 @@ populate_memnodemap(const struct bootnod
 	return res;
 }
 
-int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
+int __init compute_hash_shift(struct bootnode *nodes)
 {
 	int shift = 20;
 
-	while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0)
+	while (populate_memnodemap(nodes, shift + 1) >= 0)
 		shift++;
 
 	printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
 		shift);
 
-	if (populate_memnodemap(nodes, numnodes, shift) != 1) {
+	if (populate_memnodemap(nodes, shift) != 1) {
 		printk(KERN_INFO
 	"Your memory is not aligned you need to rebuild your kernel "
 	"with a bigger NODEMAPSIZE shift=%d\n",
@@ -225,81 +227,189 @@ void __init numa_init_array(void)
 int numa_fake __initdata = 0;
 
 /* Numa emulation */
-static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
+static unsigned long __init numa_emu_node_size(unsigned long real_size,
+					       int last)
 {
- 	int i;
- 	struct bootnode nodes[MAX_NUMNODES];
- 	unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
-
- 	/* Kludge needed for the hash function */
- 	if (hweight64(sz) > 1) {
- 		unsigned long x = 1;
- 		while ((x << 1) < sz)
- 			x <<= 1;
- 		if (x < sz/2)
- 			printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
- 		sz = x;
- 	}
-
- 	memset(&nodes,0,sizeof(nodes));
- 	for (i = 0; i < numa_fake; i++) {
- 		nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
- 		if (i == numa_fake-1)
- 			sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
- 		nodes[i].end = nodes[i].start + sz;
- 		printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
- 		       i,
- 		       nodes[i].start, nodes[i].end,
- 		       (nodes[i].end - nodes[i].start) >> 20);
-		node_set_online(i);
- 	}
- 	memnode_shift = compute_hash_shift(nodes, numa_fake);
- 	if (memnode_shift < 0) {
- 		memnode_shift = 0;
- 		printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
- 		return -1;
- 	}
- 	for_each_online_node(i)
- 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- 	numa_init_array();
- 	return 0;
+	unsigned long node_size;
+
+	if (!numa_fake)
+		return 0;
+
+	node_size = real_size / numa_fake;
+
+	if (!node_size)
+		return 0;
+
+	node_size = roundup_pow_of_two(node_size);
+
+	if (last)
+		node_size = real_size - (node_size * (numa_fake - 1));
+
+	return node_size;
 }
-#endif
 
-void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{ 
-	int i;
+static int __init numa_emu_new(int nid,
+			       unsigned long real_start, unsigned long real_end,
+			       unsigned long *emu_start, unsigned long *emu_end)
+{
+	unsigned long node_size;
 
-#ifdef CONFIG_NUMA_EMU
-	if (numa_fake && !numa_emulation(start_pfn, end_pfn))
- 		return;
+	if (nid >= numa_fake)
+		return -1;
+
+	node_size = numa_emu_node_size(real_end - real_start, 0);
+
+	if (!node_size)
+		return -1;
+
+	*emu_start = real_start + (nid * node_size);
+
+	/* give node the remaining memory */
+	if (nid == (numa_fake - 1)) {
+		node_size = numa_emu_node_size(real_end - real_start, 1);
+
+		if (!node_size)
+			return -1;
+	}
+
+	*emu_end = *emu_start + node_size;
+
+	printk("configuring fake node %u: pfn %lu - %lu\n",
+	       nid, *emu_start, *emu_end);
+
+	return 0;
+}
+
+static int __init numa_emu_shrink(int nid, int new_nodes,
+				  unsigned long real_start,
+				  unsigned long real_end,
+				  unsigned long *emu_start,
+				  unsigned long *emu_end)
+{
+	unsigned long node_size;
+
+	if (numa_fake != (new_nodes + 1))
+		return -1;
+
+	node_size = numa_emu_node_size(real_end - real_start, 0);
+
+	if (!node_size)
+		return -1;
+
+	*emu_start = real_start;
+	*emu_end = real_start + node_size;
+
+	printk("configuring real node %u: pfn %lu - %lu\n",
+	       nid, *emu_start, *emu_end);
+	printk("NUMA emulation, adding %u emulated node(s) to node %u\n",
+	       new_nodes, nid);
+
+	return 0;
+}
+
+void __init numa_emu_setup_nid(int real_nid)
+{
+	unsigned long start_pfn, end_pfn;
+	int real_max = 1;
+	int nid, new_nodes = 0;
+
+	if (real_nid >= real_max)
+		return;
+
+	/* setup emulated nodes */
+	for (nid = real_nid + real_max; nid < MAX_NUMNODES; nid += real_max) {
+		if (numa_emu_new(nid, nodes[real_nid].start >> PAGE_SHIFT,
+				 nodes[real_nid].end >> PAGE_SHIFT,
+				 &start_pfn, &end_pfn))
+			break;
+
+		nodes[nid].start = start_pfn << PAGE_SHIFT;
+		nodes[nid].end = end_pfn << PAGE_SHIFT;
+		new_nodes++;
+	}
+
+	if (!new_nodes)
+		return;
+
+	/* shrink real node */
+	if (numa_emu_shrink(real_nid, new_nodes,
+			    nodes[real_nid].start >> PAGE_SHIFT,
+			    nodes[real_nid].end >> PAGE_SHIFT,
+			    &start_pfn, &end_pfn))
+		return;
+
+	nodes[real_nid].start = start_pfn << PAGE_SHIFT;
+	nodes[real_nid].end = end_pfn << PAGE_SHIFT;
+
+	for (nid = real_nid + real_max; nid < MAX_NUMNODES; nid += real_max) {
+		node_set_online(nid);
+
+		if (!--new_nodes)
+			break;
+	}
+}
 #endif
 
+static void __init numa_initmem_doinit(unsigned long start_pfn,
+				       unsigned long end_pfn)
+{
 #ifdef CONFIG_ACPI_NUMA
+	nodes_clear(node_online_map);
+	memset(&nodes, 0, sizeof(nodes));
+
+	/*
+	 * Parse SRAT to discover nodes.
+	 */
+	acpi_numa_init();
+
 	if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
 					  end_pfn << PAGE_SHIFT))
  		return;
 #endif
 
 #ifdef CONFIG_K8_NUMA
+	nodes_clear(node_online_map);
+	memset(&nodes, 0, sizeof(nodes));
+
 	if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
 		return;
 #endif
+
+	nodes_clear(node_online_map);
+	memset(&nodes, 0, sizeof(nodes));
+
 	printk(KERN_INFO "%s\n",
 	       numa_off ? "NUMA turned off" : "No NUMA configuration found");
 
-	printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
+	printk(KERN_INFO "Single node at %016lx-%016lx\n",
 	       start_pfn << PAGE_SHIFT,
 	       end_pfn << PAGE_SHIFT); 
 		/* setup dummy node covering all memory */ 
-	memnode_shift = 63; 
-	memnodemap[0] = 0;
-	nodes_clear(node_online_map);
+
 	node_set_online(0);
-	for (i = 0; i < NR_CPUS; i++)
-		numa_set_node(i, 0);
-	node_to_cpumask[0] = cpumask_of_cpu(0);
-	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
+	nodes[0].start = start_pfn << PAGE_SHIFT;
+	nodes[0].end = end_pfn << PAGE_SHIFT;
+}
+
+void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+	int i;
+
+	numa_initmem_doinit(start_pfn, end_pfn);
+
+	for_each_online_node(i)
+		numa_emu_setup_nid(i);
+
+	memnode_shift = compute_hash_shift(nodes);
+	if (memnode_shift < 0) {
+		printk(KERN_ERR "No NUMA hash function found. Contact maintainer\n");
+		return;
+	}
+	printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
+
+	for_each_online_node(i)
+		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+	numa_init_array();
 }
 
 __cpuinit void numa_add_cpu(int cpu)
@@ -355,7 +465,7 @@ static __init int numa_setup(char *opt)
 	if (!strncmp(opt,"off",3))
 		numa_off = 1;
 #ifdef CONFIG_NUMA_EMU
-	if(!strncmp(opt, "fake=", 5)) {
+	if(!strncmp(opt, "fake=", 5) && (*(opt + 5))) {
 		numa_fake = simple_strtoul(opt+5,NULL,0); ;
 		if (numa_fake >= MAX_NUMNODES)
 			numa_fake = MAX_NUMNODES;
diff -puN arch/x86_64/mm/srat.c~x86_64-fix-fake-numa arch/x86_64/mm/srat.c
--- a/arch/x86_64/mm/srat.c~x86_64-fix-fake-numa
+++ a/arch/x86_64/mm/srat.c
@@ -32,7 +32,6 @@ int acpi_numa __initdata;
 static struct acpi_table_slit *acpi_slit;
 
 static nodemask_t nodes_parsed __initdata;
-static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
 static int found_add_area __initdata;
 int hotadd_percent __initdata = 0;
@@ -384,7 +383,8 @@ int __init acpi_scan_nodes(unsigned long
 		if ((nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
 			unparse_node(i);
 			node_set_offline(i);
-		}
+		} else
+			node_set_online(i);
 	}
 
 	if (acpi_numa <= 0)
@@ -395,20 +395,8 @@ int __init acpi_scan_nodes(unsigned long
 		return -1;
 	}
 
-	memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES);
-	if (memnode_shift < 0) {
-		printk(KERN_ERR
-		     "SRAT: No NUMA node hash function found. Contact maintainer\n");
-		bad_srat();
-		return -1;
-	}
-
 	/* Finally register nodes */
 	for_each_node_mask(i, nodes_parsed)
-		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-	/* Try again in case setup_node_bootmem missed one due
-	   to missing bootmem */
-	for_each_node_mask(i, nodes_parsed)
 		if (!node_online(i))
 			setup_node_bootmem(i, nodes[i].start, nodes[i].end);
 
@@ -418,7 +406,6 @@ int __init acpi_scan_nodes(unsigned long
 		if (!node_isset(cpu_to_node[i], nodes_parsed))
 			numa_set_node(i, NUMA_NO_NODE);
 	}
-	numa_init_array();
 	return 0;
 }
 
diff -puN include/asm-x86_64/numa.h~x86_64-fix-fake-numa include/asm-x86_64/numa.h
--- a/include/asm-x86_64/numa.h~x86_64-fix-fake-numa
+++ a/include/asm-x86_64/numa.h
@@ -7,7 +7,7 @@ struct bootnode {
 	u64 start,end; 
 };
 
-extern int compute_hash_shift(struct bootnode *nodes, int numnodes);
+extern int compute_hash_shift(struct bootnode *nodes);
 
 #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
 
@@ -20,6 +20,7 @@ extern void srat_reserve_add_area(int no
 extern int hotadd_percent;
 
 extern unsigned char apicid_to_node[256];
+extern struct bootnode nodes[MAX_NUMNODES];
 #ifdef CONFIG_NUMA
 extern void __init init_cpu_to_node(void);
 
_

Patches currently in -mm which might be from rientjes@xxxxxxxxxx are

x86_64-fix-fake-numa.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux