[PATCH 5 of 5] numactl --hardware should handle sparse node numbering

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Author: Tim Pepper <lnxninja@xxxxxxxxxxxxxxxxxx>
Date:   Tue Sep 21 16:53:01 2010 -0700

    numactl --hardware should handle sparse node numbering
    
    Previously this has been enabled, but the patches by Amit Arora in
    2009 were partly undone by Thomas Renninger in 2010, because these
    past patches appear to have only considered parts of the problem.
    Reverting the patches fixed Thomas's problem but reintroduced Amit's.
    
    It is possible to have sparse node numbering as well as nodes with
    no memory, nodes with no cpus and nodes with neither cpus or memory.
    All of these should be handled.  The existing node bitmasks and code also
    have conflated those possibilities with policy (ie: numa_all_nodes_ptr
    contains nodes which have memory and from which the calling process may
    allocated memory).  For this reason a new mode bitmask, numa_all_nodes,
    is added and populated so that 'numactl --hardware' can truly inventory
    the available hardware as per the man page.
    
    This may or may not be correct in that the word "available" may or may not
    have been intended to include policy and actual usability of the nodes.
    But it seems that the command is meant to show a lower level inventory,
    in which case it seems reasonable to truly print out all the nodes which
    kernelspace has exposed to userspace.
    
    Signed-off-by: Tim Pepper <lnxninja@xxxxxxxxxxxxxxxxxx>
    Cc: Thomas Renninger <trenn@xxxxxxx>
    Cc: Anton Blanchard <anton@xxxxxxxxx>
    Cc: Amit Arora <aarora@xxxxxxxxxxxxxxxxxx>
---
 distance.c        |   26 ++++++++++++++++----------
 libnuma.c         |    9 +++++++--
 numa.h            |    3 +++
 numactl.c         |   51 +++++++++++++++++++++++++++++++++++++++++++++++----
 versions.ldscript |    1 +
 5 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/distance.c b/distance.c
index 2aad2bb..2b48f97 100755
--- a/distance.c
+++ b/distance.c
@@ -26,14 +26,21 @@
 static int distance_numnodes;
 static int *distance_table;
 
-static void parse_numbers(char *s, int *iptr, int n)
+static void parse_numbers(char *s, int *iptr)
 {
 	int i, d, j;
 	char *end;
-	for (i = 0, j = 0; i < n; i++, j++) {
+	int maxnode = numa_max_node();
+	int numnodes = 0;
+
+	for (i = 0; i <= maxnode; i++)
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			numnodes++;
+
+	for (i = 0, j = 0; i <= maxnode; i++, j++) {
 		d = strtoul(s, &end, 0);
 		/* Skip unavailable nodes */
-		while (j<n &&  !numa_bitmask_isbitset(numa_all_nodes_ptr, j))
+		while (j<=maxnode && !numa_bitmask_isbitset(numa_nodes_ptr, j))
 			j++;
 		*(iptr+j) = d;
 		if (s == end)
@@ -47,10 +54,10 @@ static int read_distance_table(void)
 	int nd, len;
 	char *line = NULL;
 	size_t linelen = 0;
-	int numnodes = 0;
+	int maxnode = numa_max_node() + 1;
 	int *table = NULL;
 	int err = -1;
-	
+
 	for (nd = 0;; nd++) {
 		char fn[100];
 		FILE *dfh;
@@ -59,7 +66,7 @@ static int read_distance_table(void)
 		if (!dfh) {
 			if (errno == ENOENT && nd > 0)
 				err = 0;
-			if (!err && nd<=numa_max_node())
+			if (!err && nd<maxnode)
 				continue;
 			else
 				break;
@@ -70,15 +77,14 @@ static int read_distance_table(void)
 			break;
 
 		if (!table) {
-			numnodes = numa_num_configured_nodes();
-			table = calloc(numnodes * numnodes, sizeof(int));
+			table = calloc(maxnode * maxnode, sizeof(int));
 			if (!table) {
 				errno = ENOMEM;
 				break;
 			}
 		}
 
-		parse_numbers(line, table + nd * numnodes, numnodes);
+		parse_numbers(line, table + nd * maxnode);
 	}
 	free(line);
 	if (err)  {
@@ -96,7 +102,7 @@ static int read_distance_table(void)
 		free(table);
 		return 0;
 	}
-	distance_numnodes = numnodes;
+	distance_numnodes = maxnode;
 	distance_table = table;
 	return 0;		
 }
diff --git a/libnuma.c b/libnuma.c
index 641ad26..d40835d 100644
--- a/libnuma.c
+++ b/libnuma.c
@@ -51,6 +51,7 @@ struct bitmask *numa_all_cpus_ptr = NULL;
    of numa_no_nodes and numa_all_nodes, but the loader does not correctly
    handle versioning of BSS versus small data items */
 
+struct bitmask *numa_nodes_ptr = NULL;
 static struct bitmask *numa_memnode_ptr = NULL;
 static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
 struct bitmask **node_cpu_mask_v2;
@@ -105,6 +106,8 @@ numa_fini(void)
 		numa_bitmask_free(numa_no_nodes_ptr);
 	if (numa_memnode_ptr)
 		numa_bitmask_free(numa_memnode_ptr);
+	if (numa_nodes_ptr)
+		numa_bitmask_free(numa_nodes_ptr);
 }
 
 /*
@@ -292,8 +295,8 @@ int numa_pagesize(void)
 make_internal_alias(numa_pagesize);
 
 /*
- * Find nodes with memory (numa_memnode_ptr) and the highest numbered
- * existing node (maxconfigurednode).
+ * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
+ * and the highest numbered existing node (maxconfigurednode).
  */
 static void
 set_configured_nodes(void)
@@ -303,6 +306,7 @@ set_configured_nodes(void)
 	long long freep;
 
 	numa_memnode_ptr = numa_allocate_nodemask();
+	numa_nodes_ptr = numa_allocate_nodemask();
 
 	d = opendir("/sys/devices/system/node");
 	if (!d) {
@@ -313,6 +317,7 @@ set_configured_nodes(void)
 			if (strncmp(de->d_name, "node", 4))
 				continue;
 			nd = strtoul(de->d_name+4, NULL, 0);
+			numa_bitmask_setbit(numa_nodes_ptr, nd);
 			if (numa_node_size64(nd, &freep) > 0)
 				numa_bitmask_setbit(numa_memnode_ptr, nd);
 			if (maxconfigurednode < nd)
diff --git a/numa.h b/numa.h
index 989f4d7..9a6a644 100755
--- a/numa.h
+++ b/numa.h
@@ -150,6 +150,9 @@ int numa_pagesize(void);
    Only valid after numa_available. */
 extern struct bitmask *numa_all_nodes_ptr;
 
+/* Set with all nodes the kernel has exposed to userspace */
+extern struct bitmask *numa_nodes_ptr;
+
 /* For source compatibility */
 extern nodemask_t numa_all_nodes;
 
diff --git a/numactl.c b/numactl.c
index ce3a482..2e21ae8 100755
--- a/numactl.c
+++ b/numactl.c
@@ -188,12 +188,17 @@ static void print_distances(int maxnode)
 	printf("node distances:\n");
 	printf("node ");
 	for (i = 0; i <= maxnode; i++)
-		printf("% 3d ", i);
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			printf("% 3d ", i);
 	printf("\n");
 	for (i = 0; i <= maxnode; i++) {
+		if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
+			continue;
 		printf("% 3d: ", i);
 		for (k = 0; k <= maxnode; k++)
-			printf("% 3d ", numa_distance(i,k));
+			if (numa_bitmask_isbitset(numa_nodes_ptr, i) &&
+			    numa_bitmask_isbitset(numa_nodes_ptr, k))
+				printf("% 3d ", numa_distance(i,k));
 		printf("\n");
 	}			
 }
@@ -216,14 +221,52 @@ void print_node_cpus(int node)
 void hardware(void)
 {
 	int i;
+	int numnodes=0;
+	int prevnode=-1;
+	int skip=0;
 	int maxnode = numa_max_node();
 
-	printf("available: %d nodes (0-%d)\n", 1+maxnode, maxnode);
+	for (i=0; i<=maxnode; i++)
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			numnodes++;
+	printf("available: %d nodes (", numnodes);
+	for (i=0; i<=maxnode; i++) {
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i)) {
+			if (prevnode == -1) {
+				printf("%d", i);
+				prevnode=i;
+				continue;
+			}
+
+			if (i > prevnode + 1) {
+				if (skip) {
+					printf("%d", prevnode);
+					skip=0;
+				}
+				printf(",%d", i);
+				prevnode=i;
+				continue;
+			}
+
+			if (i == prevnode + 1) {
+				if (!skip) {
+					printf("-");
+					skip=1;
+				}
+				prevnode=i;
+			}
+
+			if ((i == maxnode) && skip)
+				printf("%d", prevnode);
+		}
+	}
+	printf(")\n");
+
 	for (i = 0; i <= maxnode; i++) {
 		char buf[64];
 		long long fr;
 		unsigned long long sz = numa_node_size64(i, &fr);
-		if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i))
+		if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
 			continue;
 
 		printf("node %d cpus:", i);
diff --git a/versions.ldscript b/versions.ldscript
index c2c88b6..e3389e0 100755
--- a/versions.ldscript
+++ b/versions.ldscript
@@ -119,6 +119,7 @@ libnuma_1.2 {
     numa_node_size;
     numa_node_to_cpus;
     numa_node_of_cpu;
+    numa_nodes_ptr;
     numa_num_configured_cpus;
     numa_num_configured_nodes;
     numa_num_possible_nodes;

--
To unsubscribe from this list: send the line "unsubscribe linux-numa" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [Devices]

  Powered by Linux