On 09.07.2012 17:17, Peter Krempa wrote: > This patch changes the way data to fill the nodeinfo structure are > gathered. We've gathere the test data by iterating processors an sockets > separately from nodes. The reported data was based solely on information > about core id. Problems arise when eg cores in mulit-processor machines > don't have same id's on both processors or maybe one physical processor > contains more NUMA nodes. > > This patch changes the approach how we detect processors and nodes. Now > we start at enumerating nodes and for each node processors, sockets and > threads are enumerated separately. This approach provides acurate data > that comply to docs about the nodeinfo structure. This also enables to > get rid of hacks: see commits 10d9038b744a69c8d4bd29c2e8c012a097481586, > ac9dd4a676f21b5e3ca6dbe0526f2a6709072beb. (Those changes in nodeinfo.c > are efectively reverted by this patch). > > This patch also changes output of one of the tests, as the processor > topology is now acquired more precisely. > --- > src/nodeinfo.c | 311 ++++++++++++-------- > .../linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt | 2 +- > 2 files changed, 185 insertions(+), 128 deletions(-) > > diff --git a/src/nodeinfo.c b/src/nodeinfo.c > index 819f954..ae713da 100644 > --- a/src/nodeinfo.c > +++ b/src/nodeinfo.c > @@ -188,38 +188,131 @@ virNodeParseSocket(const char *dir, unsigned int cpu) > return ret; > } > > +/* parses a node entry, returning number of processors in the node and > + * filling arguments */ > static int > -virNodeParseNode(const char *sysfs_dir) > +virNodeParseNode(const char *node, int *sockets, int *cores, int *threads) I'd mark these as ATTRIBUTE_NONNULL esp when ... > { > - char *file = NULL; > - char *possible = NULL; > - char *tmp; > int ret = -1; > + int processors = 0; > + DIR *cpudir = NULL; > + struct dirent *cpudirent = NULL; > + int sock_max = 0; > + cpu_set_t sock_map; > + int sock; > + cpu_set_t *core_maps = NULL; > + int core; > + int i; > + int siblings; > + unsigned int cpu; > + int online; > > - if (virAsprintf(&file, "%s/node/possible", sysfs_dir) < 0) { > - virReportOOMError(); > + *threads = 0; > + *cores = 0; > + *sockets = 0; ... doing this. > + > + if (!(cpudir = opendir(node))) { > + virReportSystemError(errno, _("cannot opendir %s"), node); > goto cleanup; > } > - /* Assume that a missing node/possible file implies no NUMA > - * support, and hence all cpus belong to the same node. */ > - if (!virFileExists(file)) { > - ret = 1; > + > + /* enumerate sockets in the node */ > + CPU_ZERO(&sock_map); > + while ((cpudirent = readdir(cpudir))) { I guess we want reentrant version of readdir here, don't we? > + if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1) > + continue; > + > + /* Parse socket */ > + sock = virNodeParseSocket(node, cpu); > + CPU_SET(sock, &sock_map); > + > + if (sock > sock_max) > + sock_max = sock; > + } > + > + if (errno) { You should have reset errno before while() loop. > + virReportSystemError(errno, _("problem reading %s"), node); > goto cleanup; > } > - if (virFileReadAll(file, 1024, &possible) < 0) > + > + sock_max++; > + > + /* allocate cpu maps for each socket */ > + if (VIR_ALLOC_N(core_maps, sock_max) < 0) { > + virReportOOMError(); > goto cleanup; > - if (virStrToLong_i(possible, &tmp, 10, &ret) < 0 || > - (*tmp == '-' && virStrToLong_i(tmp+1, &tmp, 10, &ret) < 0) || > - *tmp != '\n') { > - nodeReportError(VIR_ERR_INTERNAL_ERROR, > - _("failed to parse possible nodes '%s'"), possible); > + } > + > + for (i = 0; i < sock_max; i++) > + CPU_ZERO(&core_maps[i]); > + > + /* iterate over all CPU's in the node */ > + rewinddir(cpudir); > + while ((cpudirent = readdir(cpudir))) { Again, s/readdir/readdir_r/ > + if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1) > + continue; > + > + if ((online = virNodeGetCpuValue(node, cpu, "online", true)) < 0) > + goto cleanup; > + > + if (!online) > + continue; > + > + processors++; > + > + /* Parse socket */ > + sock = virNodeParseSocket(node, cpu); > + if (!CPU_ISSET(sock, &sock_map)) { > + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("CPU socket topology has changed")); > + goto cleanup; > + } > + > + /* Parse core */ > +# if defined(__s390__) || \ > + defined(__s390x__) > + /* logical cpu is equivalent to a core on s390 */ Bad indentation. > + core = cpu; > +# else > + core = virNodeGetCpuValue(node, cpu, "topology/core_id", false); > +# endif > + > + CPU_SET(core, &core_maps[sock]); > + > + if (!(siblings = virNodeCountThreadSiblings(node, cpu))) > + goto cleanup; > + > + if (siblings > *threads) > + *threads = siblings; > + } > + > + if (errno) { > + virReportSystemError(errno, _("problem reading %s"), node); > goto cleanup; > } > - ret++; > + > + /* finalize the returned data */ > + *sockets = CPU_COUNT(&sock_map); > + > + for (i = 0; i < sock_max; i++) { > + if (!CPU_ISSET(i, &sock_map)) > + continue; > + > + core = CPU_COUNT(&core_maps[i]); > + if (core > *cores) > + *cores = core; > + } > + > + ret = processors; > > cleanup: > - VIR_FREE(file); > - VIR_FREE(possible); > + /* don't shadow a more serious error */ > + if (cpudir && closedir(cpudir) < 0 && ret >= 0) { > + virReportSystemError(errno, _("problem closing %s"), node); > + ret = -1; > + } > + VIR_FREE(core_maps); > + > return ret; > } > > @@ -228,20 +321,18 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, > virNodeInfoPtr nodeinfo) > { > char line[1024]; > - DIR *cpudir = NULL; > - struct dirent *cpudirent = NULL; > - unsigned int cpu; > - unsigned long core, sock, cur_threads; > - cpu_set_t core_mask; > - cpu_set_t socket_mask; > - int online; > + DIR *nodedir = NULL; > + struct dirent *nodedirent = NULL; > + int cpus, cores, socks, threads; > + unsigned int node; > int ret = -1; > + char *sysfs_nodedir = NULL; > char *sysfs_cpudir = NULL; > - unsigned int cpu_cores = 0; > > nodeinfo->cpus = 0; > nodeinfo->mhz = 0; > nodeinfo->cores = 0; > + nodeinfo->nodes = 0; > > /* Start with parsing /proc/cpuinfo; although it tends to have > * fewer details. Hyperthreads are ignored at this stage. */ > @@ -254,40 +345,22 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, > char *p; > unsigned int ui; > > - buf += 9; > + buf += 7; > while (*buf && c_isspace(*buf)) > buf++; > > if (*buf != ':' || !buf[1]) { > - nodeReportError(VIR_ERR_INTERNAL_ERROR, > - "%s", _("parsing cpu MHz from cpuinfo")); > + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("parsing cpu MHz from cpuinfo")); > goto cleanup; > } > > - if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 > + if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 && > /* Accept trailing fractional part. */ > - && (*p == '\0' || *p == '.' || c_isspace(*p))) > + (*p == '\0' || *p == '.' || c_isspace(*p))) > nodeinfo->mhz = ui; > } > > - if (STRPREFIX(buf, "cpu cores")) { > - char *p; > - unsigned int ui; > - > - buf += 9; > - while (*buf && c_isspace(*buf)) > - buf++; > - > - if (*buf != ':' || !buf[1]) { > - nodeReportError(VIR_ERR_INTERNAL_ERROR, > - "%s", _("parsing cpu cores from cpuinfo")); > - return -1; > - } > - > - if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 > - && (*p == '\0' || c_isspace(*p))) > - cpu_cores = ui; > - } > # elif defined(__powerpc__) || \ > defined(__powerpc64__) > char *buf = line; > @@ -300,14 +373,14 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, > buf++; > > if (*buf != ':' || !buf[1]) { > - nodeReportError(VIR_ERR_INTERNAL_ERROR, > - "%s", _("parsing cpu MHz from cpuinfo")); > + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("parsing cpu MHz from cpuinfo")); > goto cleanup; > } > > - if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 > + if (virStrToLong_ui(buf+1, &p, 10, &ui) == 0 && > /* Accept trailing fractional part. */ > - && (*p == '\0' || *p == '.' || c_isspace(*p))) > + (*p == '\0' || *p == '.' || c_isspace(*p))) > nodeinfo->mhz = ui; > /* No other interesting infos are available in /proc/cpuinfo. > * However, there is a line identifying processor's version, > @@ -328,115 +401,99 @@ int linuxNodeInfoCPUPopulate(FILE *cpuinfo, > /* OK, we've parsed clock speed out of /proc/cpuinfo. Get the > * core, node, socket, thread and topology information from /sys > */ > - if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) { > + if (virAsprintf(&sysfs_nodedir, "%s/node", sysfs_dir) < 0) { > virReportOOMError(); > goto cleanup; > } > - cpudir = opendir(sysfs_cpudir); > - if (cpudir == NULL) { > - virReportSystemError(errno, _("cannot opendir %s"), sysfs_cpudir); > - goto cleanup; > + > + if (!(nodedir = opendir(sysfs_nodedir))) { > + /* the host isn't probably running a NUMA architecture */ > + goto fallback; > } > > - CPU_ZERO(&core_mask); > - CPU_ZERO(&socket_mask); > > - while ((cpudirent = readdir(cpudir))) { > - if (sscanf(cpudirent->d_name, "cpu%u", &cpu) != 1) > + while ((nodedirent = readdir(nodedir))) { > + if (sscanf(nodedirent->d_name, "node%u", &node) != 1) > continue; > > - online = virNodeGetCpuValue(sysfs_cpudir, cpu, "online", true); > - if (online < 0) { > - closedir(cpudir); > + nodeinfo->nodes++; > + > + if (virAsprintf(&sysfs_cpudir, "%s/node/%s", > + sysfs_dir, nodedirent->d_name) < 0) { > + virReportOOMError(); > goto cleanup; > } > - if (!online) > - continue; > - nodeinfo->cpus++; > > - /* Parse core */ > -# if defined(__s390__) || \ > - defined(__s390x__) > - /* logical cpu is equivalent to a core on s390 */ > - core = cpu; > -# else > - core = virNodeGetCpuValue(sysfs_cpudir, cpu, "topology/core_id", false); > -# endif > - if (!CPU_ISSET(core, &core_mask)) { > - CPU_SET(core, &core_mask); > - nodeinfo->cores++; > - } > + if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, > + &cores, &threads)) < 0) > + goto cleanup; > > - /* Parse socket */ > - sock = virNodeParseSocket(sysfs_cpudir, cpu); > - if (!CPU_ISSET(sock, &socket_mask)) { > - CPU_SET(sock, &socket_mask); > - nodeinfo->sockets++; > - } > + VIR_FREE(sysfs_cpudir); > > - cur_threads = virNodeCountThreadSiblings(sysfs_cpudir, cpu); > - if (cur_threads == 0) { > - closedir(cpudir); > - goto cleanup; > - } > - if (cur_threads > nodeinfo->threads) > - nodeinfo->threads = cur_threads; > + nodeinfo->cpus += cpus; > + > + if (socks > nodeinfo->sockets) > + nodeinfo->sockets = socks; > + > + if (cores > nodeinfo->cores) > + nodeinfo->cores = cores; > + > + if (threads > nodeinfo->threads) > + nodeinfo->threads = threads; > } > + > if (errno) { > - virReportSystemError(errno, > - _("problem reading %s"), sysfs_cpudir); > - closedir(cpudir); > + virReportSystemError(errno, _("problem reading %s"), sysfs_nodedir); > goto cleanup; > } > - if (closedir(cpudir) < 0) { > - virReportSystemError(errno, > - _("problem closing %s"), sysfs_cpudir); > + > + if (nodeinfo->cpus && nodeinfo->nodes) > + goto done; > + > +fallback: > + VIR_FREE(sysfs_cpudir); > + > + if (virAsprintf(&sysfs_cpudir, "%s/cpu", sysfs_dir) < 0) { > + virReportOOMError(); > goto cleanup; > } > > - if ((nodeinfo->nodes = virNodeParseNode(sysfs_dir)) <= 0) > + if ((cpus = virNodeParseNode(sysfs_cpudir, &socks, &cores, &threads)) < 0) > goto cleanup; > > + nodeinfo->nodes = 1; > + nodeinfo->cpus = cpus; > + nodeinfo->sockets = socks; > + nodeinfo->cores = cores; > + nodeinfo->threads = threads; > + > +done: > /* There should always be at least one cpu, socket, node, and thread. */ > if (nodeinfo->cpus == 0) { > - nodeReportError(VIR_ERR_INTERNAL_ERROR, > - "%s", _("no CPUs found")); > + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no CPUs found")); > goto cleanup; > } > + > if (nodeinfo->sockets == 0) { > - nodeReportError(VIR_ERR_INTERNAL_ERROR, > - "%s", _("no sockets found")); > + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no sockets found")); > goto cleanup; > } > + > if (nodeinfo->threads == 0) { > - nodeReportError(VIR_ERR_INTERNAL_ERROR, > - "%s", _("no threads found")); > + nodeReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("no threads found")); > goto cleanup; > } > > - /* Platform like AMD Magny Cours has two NUMA nodes each package, and > - * the two nodes share the same core ID set, it results in the cores > - * number calculated from sysfs is not the actual cores number. Use > - * "cpu cores" in /proc/cpuinfo as the cores number instead in this case. > - * More details about the problem: > - * https://www.redhat.com/archives/libvir-list/2012-May/msg00607.html > - */ > - if (cpu_cores && (cpu_cores > nodeinfo->cores)) > - nodeinfo->cores = cpu_cores; > - > - /* nodeinfo->sockets is supposed to be a number of sockets per NUMA node, > - * however if NUMA nodes are not composed of whole sockets, we just lie > - * about the number of NUMA nodes and force apps to check capabilities XML > - * for the actual NUMA topology. > - */ > - if (nodeinfo->sockets % nodeinfo->nodes == 0) > - nodeinfo->sockets /= nodeinfo->nodes; > - else > - nodeinfo->nodes = 1; > - > ret = 0; > > cleanup: > + /* don't shadow a more serious error */ > + if (nodedir && closedir(nodedir) < 0 && ret >= 0) { > + virReportSystemError(errno, _("problem closing %s"), sysfs_nodedir); > + ret = -1; > + } > + > + VIR_FREE(sysfs_nodedir); > VIR_FREE(sysfs_cpudir); > return ret; > } > diff --git a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt > index 333187e..0306f86 100644 > --- a/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt > +++ b/tests/nodeinfodata/linux-nodeinfo-sysfs-test-3-cpu-x86-output.txt > @@ -1 +1 @@ > -CPUs: 48/48, MHz: 2100, Nodes: 1, Sockets: 4, Cores: 12, Threads: 1 > +CPUs: 48/48, MHz: 2100, Nodes: 8, Sockets: 1, Cores: 6, Threads: 1 > Otherwise looking good. ACK with switching to reentrant readdir, errno setting before first usage and comment indentation fixed. Michal -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list