Dario Faggioli wrote: > Starting from Xen 4.2, libxl has all the bits and pieces in place > for retrieving an adequate amount of information about the host > NUMA topology. It is therefore possible, after a bit of shuffling, > to arrange those information in the way libvirt wants to present > them to the outside world. > > Therefore, with this patch, the <topology> section of the host > capabilities is properly populated, when running on Xen, so that > we can figure out whether or not we're running on a NUMA host, > and what its characteristics are. > > [raistlin@Zhaman ~]$ sudo virsh --connect xen:/// capabilities > <capabilities> > <host> > <cpu> > .... > <topology> > <cells num='2'> > <cell id='0'> > <memory unit='KiB'>6291456</memory> > <cpus num='8'> > <cpu id='0' socket_id='1' core_id='0' siblings='0-1'/> > <cpu id='1' socket_id='1' core_id='0' siblings='0-1'/> > <cpu id='2' socket_id='1' core_id='1' siblings='2-3'/> > <cpu id='3' socket_id='1' core_id='1' siblings='2-3'/> > <cpu id='4' socket_id='1' core_id='9' siblings='4-5'/> > <cpu id='5' socket_id='1' core_id='9' siblings='4-5'/> > <cpu id='6' socket_id='1' core_id='10' siblings='6-7'/> > <cpu id='7' socket_id='1' core_id='10' siblings='6-7'/> > </cpus> > </cell> > <cell id='1'> > <memory unit='KiB'>6881280</memory> > <cpus num='8'> > <cpu id='8' socket_id='0' core_id='0' siblings='8-9'/> > <cpu id='9' socket_id='0' core_id='0' siblings='8-9'/> > <cpu id='10' socket_id='0' core_id='1' siblings='10-11'/> > <cpu id='11' socket_id='0' core_id='1' siblings='10-11'/> > <cpu id='12' socket_id='0' core_id='9' siblings='12-13'/> > <cpu id='13' socket_id='0' core_id='9' siblings='12-13'/> > <cpu id='14' socket_id='0' core_id='10' siblings='14-15'/> > <cpu id='15' socket_id='0' core_id='10' siblings='14-15'/> > </cpus> > </cell> > </cells> > </topology> > </host> > .... > > Signed-off-by: Dario Faggioli <dario.faggioli@xxxxxxxxxx> > --- > Changes from v1: > * fixed a typo in the commit message, as requested during review; > * fixed coding style (one function parameters per line and no spaces > between variable definitions), as requested during review; > * avoid zero-filling memory after VIR_ALLOC_N(), since it does that > already, as requested during review; > * improved out of memory error reporting, as requested during review; > * libxlMakeNumaCapabilities() created, accommodating all the NUMA > related additions, instead of having them within > libxlMakeCapabilitiesInternal(), as suggested during review. > --- > src/libxl/libxl_conf.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 140 insertions(+), 1 deletion(-) > > diff --git a/src/libxl/libxl_conf.c b/src/libxl/libxl_conf.c > index e170357..7515995 100644 > --- a/src/libxl/libxl_conf.c > +++ b/src/libxl/libxl_conf.c > @@ -161,6 +161,115 @@ libxlBuildCapabilities(virArch hostarch, > } > > static virCapsPtr > +libxlMakeNumaCapabilities(libxl_numainfo *numa_info, > + int nr_nodes, > + libxl_cputopology *cpu_topo, > + int nr_cpus, > + virCapsPtr caps) > I think this should return an int (0 success, -1 failure). > +{ > + virCapsHostNUMACellCPUPtr *cpus = NULL; > + int *nr_cpus_node = NULL; > + bool numa_failed = false; > + int i; > + > + if (VIR_ALLOC_N(cpus, nr_nodes)) { > + virReportOOMError(); > + return caps; > + } > + > + if (VIR_ALLOC_N(nr_cpus_node, nr_nodes)) { > + VIR_FREE(cpus); > + virReportOOMError(); > + return caps; > + } > + > + /* For each node, prepare a list of CPUs belonging to that node */ > + for (i = 0; i < nr_cpus; i++) { > + int node = cpu_topo[i].node; > + > + if (cpu_topo[i].core == LIBXL_CPUTOPOLOGY_INVALID_ENTRY) > + continue; > + > + nr_cpus_node[node]++; > + > + if (nr_cpus_node[node] == 1) { > + if (VIR_ALLOC(cpus[node]) < 0) { > + virReportOOMError(); > + numa_failed = true; > + goto cleanup; > + } > + } > + else { > + if (VIR_REALLOC_N(cpus[node], nr_cpus_node[node]) < 0) { > + virReportOOMError(); > + numa_failed = true; > + goto cleanup; > + } > + } > + > + /* Mapping between what libxl tells and what libvirt wants */ > + cpus[node][nr_cpus_node[node]-1].id = i; > + cpus[node][nr_cpus_node[node]-1].socket_id = cpu_topo[i].socket; > + cpus[node][nr_cpus_node[node]-1].core_id = cpu_topo[i].core; > + /* Allocate the siblings maps. We will be filling them later */ > + cpus[node][nr_cpus_node[node]-1].siblings = virBitmapNew(nr_cpus); > + if (!cpus[node][nr_cpus_node[node]-1].siblings) { > + virReportOOMError(); > + numa_failed = true; > + goto cleanup; > + } > + } > + > + /* Let's now populate the siblings bitmaps */ > + for (i = 0; i < nr_cpus; i++) { > + int j, node = cpu_topo[i].node; > + > + if (cpu_topo[i].core == LIBXL_CPUTOPOLOGY_INVALID_ENTRY) > + continue; > + > + for (j = 0; j < nr_cpus_node[node]; j++) { > + if (cpus[node][j].core_id == cpu_topo[i].core) > + ignore_value(virBitmapSetBit(cpus[node][j].siblings, i)); > Noticed the following topology on an old 2 socket, quad core Intel machine I'm using to test this patch <topology> <cells num='1'> <cell id='0'> <memory unit='KiB'>9175040</memory> <cpus num='8'> <cpu id='0' socket_id='0' core_id='0' siblings='0,4'/> <cpu id='1' socket_id='0' core_id='1' siblings='1,5'/> <cpu id='2' socket_id='0' core_id='2' siblings='2,6'/> <cpu id='3' socket_id='0' core_id='3' siblings='3,7'/> <cpu id='4' socket_id='1' core_id='0' siblings='0,4'/> <cpu id='5' socket_id='1' core_id='1' siblings='1,5'/> <cpu id='6' socket_id='1' core_id='2' siblings='2,6'/> <cpu id='7' socket_id='1' core_id='3' siblings='3,7'/> </cpus> </cell> </cells> </topology> I'm not sure how cpus 0 and 4 can be siblings when they are on different sockets, but seems xl reports similar info cpu_topology : cpu: core socket node 0: 0 0 0 1: 1 0 0 2: 2 0 0 3: 3 0 0 4: 0 1 0 5: 1 1 0 6: 2 1 0 7: 3 1 0 numa_info : node: memsize memfree distances 0: 8960 7116 10 BTW, the qemu driver reports the following on the same machine <topology> <cells num='1'> <cell id='0'> <memory unit='KiB'>8387124</memory> <cpus num='8'> <cpu id='0' socket_id='0' core_id='0' siblings='0'/> <cpu id='1' socket_id='1' core_id='0' siblings='1'/> <cpu id='2' socket_id='0' core_id='1' siblings='2'/> <cpu id='3' socket_id='1' core_id='1' siblings='3'/> <cpu id='4' socket_id='0' core_id='2' siblings='4'/> <cpu id='5' socket_id='1' core_id='2' siblings='5'/> <cpu id='6' socket_id='0' core_id='3' siblings='6'/> <cpu id='7' socket_id='1' core_id='3' siblings='7'/> </cpus> </cell> </cells> </topology> which seems correct since hyperthreading is not supported. > + } > + } > + > + for (i = 0; i < nr_nodes; i++) { > + if (numa_info[i].size == LIBXL_NUMAINFO_INVALID_ENTRY) > + continue; > + > + if (virCapabilitiesAddHostNUMACell(caps, i, nr_cpus_node[i], > + numa_info[i].size / 1024, > + cpus[i]) < 0) { > + virCapabilitiesClearHostNUMACellCPUTopology(cpus[i], > + nr_cpus_node[i]); > + numa_failed = true; > + goto cleanup; > + } > + > + /* This is safe, as the CPU list is now stored in the NUMA cell */ > + cpus[i] = NULL; > + } > + > + cleanup: > + > + if (numa_failed) { > + /* Looks like something went wrong. Well, that's bad, but probably > + * not enough to break the whole driver, so we log and carry on */ > + for (i = 0; i < nr_nodes; i++) { > + VIR_FREE(cpus[i]); > + } > + VIR_WARN("Failed to retrieve and build host NUMA topology properly,\n" > + "disabling NUMA capabilities"); > + virCapabilitiesFreeNUMAInfo(caps); > + } > + > + VIR_FREE(cpus); > + VIR_FREE(nr_cpus_node); > + > + return caps; > +} > + > +static virCapsPtr > libxlMakeCapabilitiesInternal(virArch hostarch, > libxl_physinfo *phy_info, > char *capabilities) > @@ -772,7 +881,11 @@ libxlMakeCapabilities(libxl_ctx *ctx) > { > int err; > libxl_physinfo phy_info; > + libxl_numainfo *numa_info = NULL; > + libxl_cputopology *cpu_topo = NULL; > const libxl_version_info *ver_info; > + int nr_nodes = 0, nr_cpus = 0; > + virCapsPtr caps; > > err = regcomp(&xen_cap_rec, xen_cap_re, REG_EXTENDED); > if (err != 0) { > @@ -796,9 +909,35 @@ libxlMakeCapabilities(libxl_ctx *ctx) > return NULL; > } > > - return libxlMakeCapabilitiesInternal(virArchFromHost(), > + /* Let's try to fetch NUMA info, but it is not critical if we fail */ > + numa_info = libxl_get_numainfo(ctx, &nr_nodes); > + if (numa_info == NULL) > + VIR_WARN("libxl_get_numainfo failed to retrieve NUMA data"); > + else { > + /* If the above failed, we'd have no NUMa caps anyway! */ > + cpu_topo = libxl_get_cpu_topology(ctx, &nr_cpus); > + if (cpu_topo == NULL) { > + VIR_WARN("libxl_get_cpu_topology failed to retrieve topology"); > + libxl_numainfo_list_free(numa_info, nr_nodes); > + } > + } > Move these after the call to libxlMakeCapabilitiesInternal, before calling libxlMakeNumaCapabilities. > + > + caps = libxlMakeCapabilitiesInternal(virArchFromHost(), > &phy_info, > ver_info->capabilities); > Check that caps != NULL ? > + > + /* Add topology information to caps, if available */ > + if (numa_info && cpu_topo) > + caps = libxlMakeNumaCapabilities(numa_info, > + nr_nodes, > + cpu_topo, > + nr_cpus, > + caps); > Hmm, guess there is not much to check wrt errors at this point, so libxlMakeNumaCapabilities could return void. I suppose returning success or failure via int is more libvirt style. Regards, Jim > + > + libxl_cputopology_list_free(cpu_topo, nr_cpus); > + libxl_numainfo_list_free(numa_info, nr_nodes); > + > + return caps; > } > > int > > > > -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list