This element specifies similar settings as the memory element, although memnode can be used per guest NUMA node. Signed-off-by: Martin Kletzander <mkletzan@xxxxxxxxxx> --- docs/formatdomain.html.in | 18 +++++ docs/schemas/domaincommon.rng | 17 ++++ src/conf/domain_conf.c | 181 +++++++++++++++++++++++++++++++++--------- src/qemu/qemu_domain.c | 23 +++++- src/qemu/qemu_driver.c | 12 +++ src/util/virnuma.h | 14 +++- 6 files changed, 225 insertions(+), 40 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 041f70d..fd29ae3 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -709,6 +709,8 @@ ... <numatune> <memory mode="strict" nodeset="1-4,^3"/> + <memnode cellid="0" mode="strict" nodeset="1"/> + <memnode cellid="2" mode="preferred" nodeset="2"/> </numatune> ... </domain> @@ -745,6 +747,22 @@ <span class='since'>Since 0.9.3</span> </dd> + <dt><code>memnode</code></dt> + <dd> + Optional <code>memnode</code> elements can specify memory allocation + policies per each guest NUMA node. For those nodes having no + corresponding <code>memnode</code> element, the default from + element <code>memory</code> will be used. Attribute <code>cellid</code> + addresses guest NUMA node for which the settings are applied. + Attributes <code>mode</code> and <code>nodeset</code> have the same + meaning and syntax as in <code>memory</code> element. + + Due to possible memory migration issues according to kernel settings, + using this <code>memnode</code> element effectively disables any live + changes of numatune settings in current versions of libvirt. + + <span class='since'>QEMU Since 1.2.6</span> + </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 0787b5a..a8e3ba0 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -789,6 +789,23 @@ </choice> </element> </optional> + <zeroOrMore> + <element name="memnode"> + <attribute name="cellid"> + <ref name="unsignedInt"/> + </attribute> + <attribute name="mode"> + <choice> + <value>strict</value> + <value>preferred</value> + <value>interleave</value> + </choice> + </attribute> + <attribute name='nodeset'> + <ref name='cpuset'/> + </attribute> + </element> + </zeroOrMore> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index f1df092..4818cfb 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2085,6 +2085,9 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefFree(def->cputune.emulatorpin); virBitmapFree(def->numatune.memory.nodemask); + for (i = 0; i < def->numatune.nmem_nodes; i++) + virBitmapFree(def->numatune.mem_nodes[i].nodemask); + VIR_FREE(def->numatune.mem_nodes); virSysinfoDefFree(def->sysinfo); @@ -11233,6 +11236,7 @@ virDomainDefParseXML(xmlDocPtr xml, bool usb_master = false; bool primaryVideo = false; + if (VIR_ALLOC(def) < 0) return NULL; @@ -11666,6 +11670,33 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + + /* analysis of cpu handling */ + if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { + xmlNodePtr oldnode = ctxt->node; + ctxt->node = node; + def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST); + ctxt->node = oldnode; + + if (def->cpu == NULL) + goto error; + + if (def->cpu->sockets && + def->maxvcpus > + def->cpu->sockets * def->cpu->cores * def->cpu->threads) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("Maximum CPUs greater than topology limit")); + goto error; + } + + if (def->cpu->cells_cpus > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Number of CPUs in <numa> exceeds the" + " <vcpu> count")); + goto error; + } + } + /* Extract numatune if exists. */ if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -11682,6 +11713,12 @@ virDomainDefParseXML(xmlDocPtr xml, if (n) { cur = nodes[0]->children; + if (def->cpu) { + if (VIR_ALLOC_N(def->numatune.mem_nodes, def->cpu->ncells) < 0) + goto error; + def->numatune.nmem_nodes = def->cpu->ncells; + } + while (cur != NULL) { if (cur->type == XML_ELEMENT_NODE) { if (xmlStrEqual(cur->name, BAD_CAST "memory")) { @@ -11764,6 +11801,78 @@ virDomainDefParseXML(xmlDocPtr xml, def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO; def->numatune.memory.placement_mode = placement_mode; + + } else if (xmlStrEqual(cur->name, BAD_CAST "memnode")) { + unsigned int cellid; + struct mem_node *mem_node = NULL; + + if (!def->numatune.nmem_nodes) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Element 'memnode' is invalid without " + "any guest NUMA cells")); + goto error; + } + tmp = virXMLPropString(cur, "cellid"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing required cellid attribute " + "in numatune memnode element")); + goto error; + } + if (virStrToLong_ui(tmp, NULL, 10, &cellid) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid cellid attribute " + "in numatune memnode element")); + goto error; + } + VIR_FREE(tmp); + + if (cellid >= def->numatune.nmem_nodes) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Argument 'cellid' in numatune " + "memnode element must correspond to " + "existing guest's NUMA cell")); + goto error; + } + + mem_node = &def->numatune.mem_nodes[cellid]; + + if (mem_node->specified) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Multiple numatune memnode elements " + "with duplicate 'cellid'")); + goto error; + } + + mem_node->specified = true; + + tmp = virXMLPropString(cur, "mode"); + if (tmp && + (mem_node->mode = + virDomainNumatuneMemModeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid mode attribute " + "in numatune memnode element")); + goto error; + } else if (!tmp) { + mem_node->mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + } + VIR_FREE(tmp); + + tmp = virXMLPropString(cur, "nodeset"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing required nodeset attribute " + "in numatune memnode element")); + goto error; + } + if (virBitmapParse(tmp, 0, + &mem_node->nodemask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + goto error; + } + VIR_FREE(tmp); + } else { virReportError(VIR_ERR_XML_ERROR, _("unsupported XML element %s"), @@ -12863,32 +12972,6 @@ virDomainDefParseXML(xmlDocPtr xml, goto error; } - /* analysis of cpu handling */ - if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { - xmlNodePtr oldnode = ctxt->node; - ctxt->node = node; - def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST); - ctxt->node = oldnode; - - if (def->cpu == NULL) - goto error; - - if (def->cpu->sockets && - def->maxvcpus > - def->cpu->sockets * def->cpu->cores * def->cpu->threads) { - virReportError(VIR_ERR_XML_DETAIL, "%s", - _("Maximum CPUs greater than topology limit")); - goto error; - } - - if (def->cpu->cells_cpus > def->maxvcpus) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Number of CPUs in <numa> exceeds the" - " <vcpu> count")); - goto error; - } - } - if ((node = virXPathNode("./sysinfo[1]", ctxt)) != NULL) { xmlNodePtr oldnode = ctxt->node; ctxt->node = node; @@ -17395,31 +17478,57 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAddLit(buf, "</cputune>\n"); if (def->numatune.memory.nodemask || - def->numatune.memory.placement_mode) { + def->numatune.memory.placement_mode || + def->numatune.nmem_nodes) { const char *mode; char *nodemask = NULL; const char *placement; virBufferAddLit(buf, "<numatune>\n"); virBufferAdjustIndent(buf, 2); - mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode); - virBufferAsprintf(buf, "<memory mode='%s' ", mode); - if (def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) { - nodemask = virBitmapFormat(def->numatune.memory.nodemask); + if (def->numatune.memory.nodemask || + def->numatune.memory.placement_mode) { + + mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode); + virBufferAsprintf(buf, "<memory mode='%s' ", mode); + + if (def->numatune.memory.placement_mode == + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) { + nodemask = virBitmapFormat(def->numatune.memory.nodemask); + if (nodemask == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to format nodeset for " + "NUMA memory tuning")); + goto error; + } + virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask); + VIR_FREE(nodemask); + } else if (def->numatune.memory.placement_mode) { + placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode); + virBufferAsprintf(buf, "placement='%s'/>\n", placement); + } + } + + for (i = 0; i < def->numatune.nmem_nodes; i++) { + struct mem_node *mem_node = &def->numatune.mem_nodes[i]; + if (!mem_node->specified) + continue; + + nodemask = virBitmapFormat(mem_node->nodemask); + mode = virDomainNumatuneMemModeTypeToString(mem_node->mode); if (nodemask == NULL) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to format nodeset for " "NUMA memory tuning")); goto error; } - virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask); + virBufferAsprintf(buf, + "<memnode nodeid='%zu' mode='%s' nodeset='%s'/>\n", + i, mode, nodemask); VIR_FREE(nodemask); - } else if (def->numatune.memory.placement_mode) { - placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode); - virBufferAsprintf(buf, "placement='%s'/>\n", placement); } + virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</numatune>\n"); } diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 78cfdc6..89800ab 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -710,7 +710,28 @@ qemuDomainDefPostParse(virDomainDefPtr def, bool addDefaultMemballoon = true; bool addDefaultUSBKBD = false; bool addDefaultUSBMouse = false; - + size_t i = 0; + + if (def->numatune.memory.nodemask) { + for (i = 0; i < def->numatune.nmem_nodes; i++) { + struct mem_node *mem_node = &def->numatune.mem_nodes[i]; + ssize_t pos = -1; + bool bit = false; + + if (!mem_node->specified) + continue; + + while ((pos = virBitmapNextSetBit(mem_node->nodemask, pos)) >= 0) { + if (virBitmapGetBit(def->numatune.memory.nodemask, pos, &bit) < 0 || + !bit) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("memnode nodeset must be subset of the " + "global memory nodeset")); + return -1; + } + } + } + } /* check for emulator and create a default one if needed */ if (!def->emulator && !(def->emulator = virDomainDefGetDefaultEmulator(def, caps))) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 2b852eb..d838ad2 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8613,6 +8613,16 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm, goto cleanup; } + for (i = 0; i < def->numatune->nmem_nodes; i++) { + if (def->numatune->mem_nodes[i].specified) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("change of nodeset for running domain " + "with per guest NUMA node numatune settings " + "is not supported")); + goto cleanup; + } + } + /* Get existing nodeset values */ if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 || virBitmapParse(nodeset_str, 0, &temp_nodeset, @@ -8853,6 +8863,8 @@ qemuDomainGetNumaParameters(virDomainPtr dom, } } + /* ASDF: <memnode> settings are not reflected here! yet */ + for (i = 0; i < QEMU_NB_NUMA_PARAM && i < *nparams; i++) { virMemoryParameterPtr param = ¶ms[i]; diff --git a/src/util/virnuma.h b/src/util/virnuma.h index 8464b19..5e7608d 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -1,7 +1,7 @@ /* * virnuma.h: helper APIs for managing numa * - * Copyright (C) 2011-2013 Red Hat, Inc. + * Copyright (C) 2011-2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -43,9 +43,17 @@ typedef virNumaTuneDef *virNumaTuneDefPtr; struct _virNumaTuneDef { struct { virBitmapPtr nodemask; - int mode; + int mode; /* enum virDomainNumatuneMemMode */ int placement_mode; /* enum virNumaTuneMemPlacementMode */ - } memory; + } memory; /* pinning for all the memory */ + + struct mem_node { + bool specified; + unsigned int nodeid; + virBitmapPtr nodemask; + int mode; + } *mem_nodes; /* pinning per guest's NUMA node */ + size_t nmem_nodes; /* Future NUMA tuning related stuff should go here. */ }; -- 1.9.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list