- 2-configurable-node-sizes.patch removed from -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     configurabl node sizes
has been removed from the -mm tree.  Its filename was
     2-configurable-node-sizes.patch

This patch was dropped because it is obsolete

------------------------------------------------------
Subject: configurabl node sizes
From: David Rientjes <rientjes@xxxxxxxxxx>

Allows a more elaborate command line to be specified to numa=fake so that
nodes can be different sizes.  The command line is specified as:

	numa=fake=<cmdline>

	If a number, fakes <cmdline> nodes and ignores NUMA setup of
	the actual machine.  Otherwise, system memory is configured
	depending on the sizes and coefficients listed.  For example:
		numa=fake=2*512,1024,4*256,*128
	gives two 512M nodes, a 1024M node, four 256M nodes, and the
	rest split into 128M chunks.  If the last character of
	<cmdline> is a *, the remaining memory is divided up equally
	among its coefficient:
		numa=fake=2*512,2*
	gives two 512M nodes and the rest split into two nodes.  If
	the last character is a comma, the remaining system memory
	is not allocated to an additional node.

Useful in combination with CPUsets.

Signed-off-by: David Rientjes <rientjes@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 Documentation/x86_64/boot-options.txt |   14 +
 arch/x86_64/mm/numa.c                 |  234 ++++++++++++++++++------
 2 files changed, 195 insertions(+), 53 deletions(-)

diff -puN Documentation/x86_64/boot-options.txt~2-configurable-node-sizes Documentation/x86_64/boot-options.txt
--- a/Documentation/x86_64/boot-options.txt~2-configurable-node-sizes
+++ a/Documentation/x86_64/boot-options.txt
@@ -149,7 +149,19 @@ NUMA
 
   numa=noacpi   Don't parse the SRAT table for NUMA setup
 
-  numa=fake=X   Fake X nodes and ignore NUMA setup of the actual machine.
+  numa=fake=<cmdline>
+		If a number, fakes <cmdline> nodes and ignores NUMA setup of
+		the actual machine.  Otherwise, system memory is configured
+		depending on the sizes and coefficients listed.  For example:
+			numa=fake=2*512,1024,4*256,*128
+		gives two 512 MB nodes, a 1024 MB node, four 256 MB nodes, and
+		the rest split into 128 MB chunks.  If the last character of
+		<cmdline> is a *, the remaining memory is divided up equally
+		among its coefficient:
+			numa=fake=2*512,2*
+		gives two 512 MB nodes and the rest split into two nodes.  If
+		the last character is a comma, the remaining system memory is
+		not allocated to an additional node.
 
   numa=hotadd=percent
 		Only allow hotadd memory to preallocate page structures upto
diff -puN arch/x86_64/mm/numa.c~2-configurable-node-sizes arch/x86_64/mm/numa.c
--- a/arch/x86_64/mm/numa.c~2-configurable-node-sizes
+++ a/arch/x86_64/mm/numa.c
@@ -180,74 +180,208 @@ void __init numa_init_array(void)
 
 }
 
+/* NUMA emulation */
 #ifdef CONFIG_NUMA_EMU
-int numa_fake __initdata = 0;
+#define E820_ADDR_HOLE_SIZE(start, end)					\
+	(e820_hole_size((start) >> PAGE_SHIFT, (end) >> PAGE_SHIFT) <<	\
+		PAGE_SHIFT)
+char *cmdline __initdata;
 
-/* Numa emulation */
-static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
+/*
+ * Sets up nodeid to range from addr to addr + sz.  If the end boundary is
+ * greater than max_addr, then max_addr is used instead.  The return value is 0
+ * if there is additional memory left for allocation past addr and -1 otherwise.
+ * addr is adjusted to be at the end of node.
+ */
+static int setup_node_range(int nodeid, struct bootnode *nodes, u64 *addr,
+			    u64 sz, u64 max_addr)
 {
- 	int i, big;
- 	struct bootnode nodes[numa_fake];
-	u64 addr = start_pfn << PAGE_SHIFT;
-	u64 sz = ((end_pfn - start_pfn - e820_hole_size(start_pfn, end_pfn)) <<
-		  PAGE_SHIFT) / numa_fake;
+	int ret = 0;
+	nodes[nodeid].start = *addr;
+	*addr += sz;
+	if (*addr >= max_addr) {
+		*addr = max_addr;
+		ret = -1;
+	}
+	nodes[nodeid].end = *addr;
+	node_set_online(nodeid);
+	printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nodeid,
+	       nodes[nodeid].start, nodes[nodeid].end,
+	       (nodes[nodeid].end - nodes[nodeid].start) >> 20);
+	return ret;
+}
+
+/*
+ * Splits num_nodes nodes up equally starting at node_start.  The return value
+ * is the number of nodes split up and addr is adjusted to be at the end of the
+ * last node allocated.
+ */
+static int split_nodes_equal(struct bootnode *nodes, u64 *addr, u64 max_addr,
+			     int node_start, int num_nodes)
+{
+	unsigned int big;
+	u64 sz;
+	int i;
 
+	if (num_nodes <= 0)
+		return -1;
+	if (num_nodes > MAX_NUMNODES)
+		num_nodes = MAX_NUMNODES;
+	sz = (max_addr - *addr - E820_ADDR_HOLE_SIZE(*addr, max_addr)) /
+		num_nodes;
 	/*
 	 * Calculate the number of big nodes that can be allocated as a result
-	 * of consolidating our masked leftovers.
+	 * of consolidating the leftovers.
 	 */
-	big = ((sz & ~NODE_HASH_MASK) * numa_fake) / NODE_MIN_SIZE;
+	big = ((sz & ~NODE_HASH_MASK) * num_nodes) / NODE_MIN_SIZE;
 
-	/* Round down to nearest 4MB for hash function */
+	/* Round down to the nearest 4 MB for hash function */
 	sz &= NODE_HASH_MASK;
-	if (sz == 0) {
-		printk(KERN_ERR "Not enough memory allotted for each node. "
-		       "Numa emulation disabled.\n");
+	if (!sz) {
+		printk(KERN_ERR "Not enough memory for each node. "
+		       "NUMA emulation disabled.\n");
 		return -1;
 	}
 
- 	memset(&nodes,0,sizeof(nodes));
- 	for (i = 0; i < numa_fake; i++) {
-		nodes[i].start = addr;
-		addr += sz;
+	for (i = node_start; i < num_nodes + node_start; i++) {
+		u64 end = *addr + sz;
 		if (i < big)
-			addr += NODE_MIN_SIZE;
+			end += NODE_MIN_SIZE;
 		/*
-		 * The final node cannot be over-allocated so give it the
-		 * remaining memory available and ignore asymmetry.
-		 *
-		 * Other nodes receive roughly the same amount of avaiable
-		 * pages.
+		 * The final node can have the remaining system RAM.  Other
+		 * nodes receive roughly the same amount of available pages.
 		 */
-		if (i == numa_fake - 1)
-			addr = end_pfn << PAGE_SHIFT;
+		if (i == num_nodes + node_start - 1)
+			end = max_addr;
 		else
-			while (addr - nodes[i].start -
-			       (e820_hole_size(nodes[i].start >> PAGE_SHIFT,
-					       addr >> PAGE_SHIFT) <<
-					       PAGE_SHIFT) < sz)
-				addr += NODE_MIN_SIZE;
-		nodes[i].end = addr;
- 		printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n",
- 		       i,
- 		       nodes[i].start, nodes[i].end,
- 		       (nodes[i].end - nodes[i].start) >> 20);
-		node_set_online(i);
- 	}
-	populate_physnode_map(nodes, numa_fake);
- 	for_each_online_node(i)
- 		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- 	numa_init_array();
- 	return 0;
+			while (end - *addr -
+			       E820_ADDR_HOLE_SIZE(*addr, end) < sz)
+				end += NODE_MIN_SIZE;
+		if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0)
+			break;
+	}
+	return i - node_start + 1;
+}
+
+/*
+ * Splits the remaining system RAM into chunks of size.  The remaining memory is
+ * always assigned to a final node and can be asymmetric.  Returns the number of
+ * nodes split.
+ */
+static int split_nodes_size(struct bootnode *nodes, u64 *addr, u64 max_addr,
+			    int node_start, u64 sz)
+{
+	int i = node_start;
+	sz = (sz << 20) & NODE_HASH_MASK;
+	while (!setup_node_range(i++, nodes, addr, sz, max_addr))
+		;
+	return i - node_start;
 }
-#endif
+
+/*
+ * Sets up the system RAM area from start_pfn to end_pfn according to the
+ * numa=fake command line.
+ */
+static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
+{
+	struct bootnode nodes[MAX_NUMNODES];
+	u64 addr = start_pfn << PAGE_SHIFT;
+	u64 max_addr = end_pfn << PAGE_SHIFT;
+	u64 sz;
+	int num_nodes;
+	int coeff_flag;
+	int coeff = -1;
+	int num;
+	int i;
+
+	memset(&nodes, 0, sizeof(nodes));
+	/*
+	 * If the numa=fake command line is just a single number N, split the
+	 * system RAM into N fake nodes.
+	 */
+	if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) {
+		num_nodes = split_nodes_equal(nodes, &addr, max_addr, 0,
+					      simple_strtol(cmdline, NULL, 0));
+		if (num_nodes < 0)
+			return num_nodes;
+		goto out;
+	}
+
+	/* Parse the command line */
+	for (coeff_flag = num = num_nodes = 0; ; cmdline++) {
+		if (*cmdline && isdigit(*cmdline)) {
+			num = num * 10 + *cmdline - '0';
+			continue;
+		} else if (*cmdline == '*') {
+			if (num > 0)
+				coeff = num;
+			coeff_flag = 1;
+		} else if (*cmdline == ',' || !*cmdline) {
+			if (!coeff_flag)
+				coeff = 1;
+			/*
+			 * Round down to the nearest 4 MB for hash function.
+			 * Command line coefficients are in megabytes.
+			 */
+			sz = ((u64)num << 20) & NODE_HASH_MASK;
+			if (sz)
+				for (i = 0; i < coeff; i++, num_nodes++)
+					if (setup_node_range(num_nodes, nodes,
+						&addr, sz, max_addr) < 0)
+						goto done;
+			if (!*cmdline)
+				break;
+			coeff = -1;
+			coeff_flag = 0;
+		}
+		num = 0;
+	}
+done:
+	if (!num_nodes)
+		return -1;
+	/* Fill remaining system RAM */
+	if (addr < max_addr) {
+		if (coeff_flag && coeff < 0) {
+			/* Split remaining nodes into num-sized chunks */
+			num_nodes += split_nodes_size(nodes, &addr, max_addr,
+						      num_nodes, num);
+			goto out;
+		}
+		switch (*(cmdline - 1)) {
+		case '*':
+			/* Split remaining nodes into coeff chunks */
+			if (coeff <= 0)
+				break;
+			num_nodes += split_nodes_equal(nodes, &addr, max_addr,
+						       num_nodes, coeff);
+			break;
+		case ',':
+			/* Do not allocate remaining system RAM */
+			break;
+		default:
+			/* Give one final node */
+			setup_node_range(num_nodes, nodes, &addr,
+					 max_addr - addr, max_addr);
+			num_nodes++;
+		}
+	}
+out:
+	populate_physnode_map(nodes, num_nodes);
+	for_each_online_node(i)
+		setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+	numa_init_array();
+	return 0;
+}
+
+#undef E820_ADDR_HOLE_SIZE
+#endif /* CONFIG_NUMA_EMU */
 
 void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 { 
 	int i;
 
 #ifdef CONFIG_NUMA_EMU
-	if (numa_fake && !numa_emulation(start_pfn, end_pfn))
+	if (cmdline && !numa_emulation(start_pfn, end_pfn))
  		return;
 #endif
 
@@ -267,7 +401,7 @@ void __init numa_initmem_init(unsigned l
 	printk(KERN_INFO "Faking a node at %016lx-%016lx\n", 
 	       start_pfn << PAGE_SHIFT,
 	       end_pfn << PAGE_SHIFT); 
-		/* setup dummy node covering all memory */ 
+	/* setup dummy node covering all memory */
 	nodes_clear(node_online_map);
 	node_set_online(0);
 	for (i = 0; i < NR_CPUS; i++)
@@ -322,17 +456,13 @@ void __init paging_init(void)
 	}
 } 
 
-/* [numa=off] */
 __init int numa_setup(char *opt) 
 { 
 	if (!strncmp(opt,"off",3))
 		numa_off = 1;
 #ifdef CONFIG_NUMA_EMU
-	if(!strncmp(opt, "fake=", 5)) {
-		numa_fake = simple_strtoul(opt+5,NULL,0); ;
-		if (numa_fake >= MAX_NUMNODES)
-			numa_fake = MAX_NUMNODES;
-	}
+	if(!strncmp(opt, "fake=", 5))
+		cmdline = opt + 5;
 #endif
 #ifdef CONFIG_ACPI_NUMA
  	if (!strncmp(opt,"noacpi",6))
_

Patches currently in -mm which might be from rientjes@xxxxxxxxxx are


-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux