This element specifies similar settings as the memory element, although memnode can be used per guest NUMA node. Signed-off-by: Martin Kletzander <mkletzan@xxxxxxxxxx> --- docs/formatdomain.html.in | 15 +++ docs/schemas/domaincommon.rng | 17 ++++ src/conf/domain_conf.c | 220 +++++++++++++++++++++++++++++++++++------- src/qemu/qemu_domain.c | 23 ++++- src/qemu/qemu_driver.c | 11 +++ src/util/virnuma.h | 14 ++- 6 files changed, 260 insertions(+), 40 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 041f70d..2d855ea 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -709,6 +709,8 @@ ... <numatune> <memory mode="strict" nodeset="1-4,^3"/> + <memnode cellid="0" mode="strict" nodeset="1"/> + <memnode cellid="2" mode="preferred" nodeset="2"/> </numatune> ... </domain> @@ -745,6 +747,19 @@ <span class='since'>Since 0.9.3</span> </dd> + <dt><code>memnode</code></dt> + <dd> + Optional <code>memnode</code> elements can specify memory allocation + policies per each guest NUMA node. For those nodes having no + corresponding <code>memnode</code> element, the default from + element <code>memory</code> will be used. Attribute <code>cellid</code> + addresses guest NUMA node for which the settings are applied. + Attributes <code>mode</code> and <code>nodeset</code> have the same + meaning and syntax as in <code>memory</code> element. + + This setting is not compatible with automatic placement. + <span class='since'>QEMU Since 1.2.6</span> + </dd> </dl> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 0787b5a..a8e3ba0 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -789,6 +789,23 @@ </choice> </element> </optional> + <zeroOrMore> + <element name="memnode"> + <attribute name="cellid"> + <ref name="unsignedInt"/> + </attribute> + <attribute name="mode"> + <choice> + <value>strict</value> + <value>preferred</value> + <value>interleave</value> + </choice> + </attribute> + <attribute name='nodeset'> + <ref name='cpuset'/> + </attribute> + </element> + </zeroOrMore> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index fe06921..352ba92 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -2085,6 +2085,9 @@ void virDomainDefFree(virDomainDefPtr def) virDomainVcpuPinDefFree(def->cputune.emulatorpin); virBitmapFree(def->numatune.memory.nodemask); + for (i = 0; i < def->numatune.nmem_nodes; i++) + virBitmapFree(def->numatune.mem_nodes[i].nodemask); + VIR_FREE(def->numatune.mem_nodes); virSysinfoDefFree(def->sysinfo); @@ -11232,6 +11235,8 @@ virDomainDefParseXML(xmlDocPtr xml, bool usb_other = false; bool usb_master = false; bool primaryVideo = false; + bool mem_nodes = false; + if (VIR_ALLOC(def) < 0) return NULL; @@ -11666,6 +11671,33 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + + /* analysis of cpu handling */ + if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { + xmlNodePtr oldnode = ctxt->node; + ctxt->node = node; + def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST); + ctxt->node = oldnode; + + if (def->cpu == NULL) + goto error; + + if (def->cpu->sockets && + def->maxvcpus > + def->cpu->sockets * def->cpu->cores * def->cpu->threads) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("Maximum CPUs greater than topology limit")); + goto error; + } + + if (def->cpu->cells_cpus > def->maxvcpus) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("Number of CPUs in <numa> exceeds the" + " <vcpu> count")); + goto error; + } + } + /* Extract numatune if exists. */ if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, @@ -11682,6 +11714,12 @@ virDomainDefParseXML(xmlDocPtr xml, if (n) { cur = nodes[0]->children; + if (def->cpu) { + if (VIR_ALLOC_N(def->numatune.mem_nodes, def->cpu->ncells) < 0) + goto error; + def->numatune.nmem_nodes = def->cpu->ncells; + } + while (cur != NULL) { if (cur->type == XML_ELEMENT_NODE) { if (xmlStrEqual(cur->name, BAD_CAST "memory")) { @@ -11764,6 +11802,80 @@ virDomainDefParseXML(xmlDocPtr xml, def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO; def->numatune.memory.placement_mode = placement_mode; + + } else if (xmlStrEqual(cur->name, BAD_CAST "memnode")) { + unsigned int cellid; + struct mem_node *mem_node = NULL; + + if (!def->numatune.nmem_nodes) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Element 'memnode' is invalid without " + "any guest NUMA cells")); + goto error; + } + tmp = virXMLPropString(cur, "cellid"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing required cellid attribute " + "in numatune memnode element")); + goto error; + } + if (virStrToLong_ui(tmp, NULL, 10, &cellid) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid cellid attribute " + "in numatune memnode element")); + goto error; + } + VIR_FREE(tmp); + + if (cellid >= def->numatune.nmem_nodes) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Argument 'cellid' in numatune " + "memnode element must correspond to " + "existing guest's NUMA cell")); + goto error; + } + + mem_node = &def->numatune.mem_nodes[cellid]; + + if (mem_node->specified) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Multiple numatune memnode elements " + "with duplicate 'cellid'")); + goto error; + } + + mem_node->specified = true; + mem_nodes = true; + + tmp = virXMLPropString(cur, "mode"); + if (!tmp) { + mem_node->mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT; + } else { + if ((mem_node->mode = + virDomainNumatuneMemModeTypeFromString(tmp)) < 0) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Invalid mode attribute " + "in numatune memnode element")); + goto error; + } + VIR_FREE(tmp); + } + + tmp = virXMLPropString(cur, "nodeset"); + if (!tmp) { + virReportError(VIR_ERR_XML_ERROR, "%s", + _("Missing required nodeset attribute " + "in numatune memnode element")); + goto error; + } + if (virBitmapParse(tmp, 0, + &mem_node->nodemask, + VIR_DOMAIN_CPUMASK_LEN) < 0) { + goto error; + } + VIR_FREE(tmp); + } else { virReportError(VIR_ERR_XML_ERROR, _("unsupported XML element %s"), @@ -11784,6 +11896,42 @@ virDomainDefParseXML(xmlDocPtr xml, } VIR_FREE(nodes); + if (def->numatune.nmem_nodes && + def->numatune.memory.placement_mode == + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_AUTO) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("Per-node binding is not compatible with " + "automatic NUMA placement.")); + goto error; + } + + if (!mem_nodes) { + /* If there are no <memnode> settings, clear all these data. + * If any driver wants to use these in the future, this code + * can be cleared. Until then it's easier to keep it this + * way. */ + for (i = 0; i < def->numatune.nmem_nodes; i++) + virBitmapFree(def->numatune.mem_nodes[i].nodemask); + VIR_FREE(def->numatune.mem_nodes); + def->numatune.nmem_nodes = 0; + } else { + /* Copy numatune/memory information into each node, but leave + * specified == false. This eases the process of determination + * of each node's nodemask */ + for (i = 0; i < def->numatune.nmem_nodes; i++) { + struct mem_node *mem_node = &def->numatune.mem_nodes[i]; + + if (mem_node->specified) + continue; + + mem_node->mode = def->numatune.memory.mode; + mem_node->nodemask = virBitmapNewCopy(def->numatune.memory.nodemask); + + if (!mem_node->nodemask) + goto error; + } + } + if ((n = virXPathNodeSet("./resource", ctxt, &nodes)) < 0) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("cannot extract resource nodes")); @@ -12863,32 +13011,6 @@ virDomainDefParseXML(xmlDocPtr xml, goto error; } - /* analysis of cpu handling */ - if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) { - xmlNodePtr oldnode = ctxt->node; - ctxt->node = node; - def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST); - ctxt->node = oldnode; - - if (def->cpu == NULL) - goto error; - - if (def->cpu->sockets && - def->maxvcpus > - def->cpu->sockets * def->cpu->cores * def->cpu->threads) { - virReportError(VIR_ERR_XML_DETAIL, "%s", - _("Maximum CPUs greater than topology limit")); - goto error; - } - - if (def->cpu->cells_cpus > def->maxvcpus) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Number of CPUs in <numa> exceeds the" - " <vcpu> count")); - goto error; - } - } - if ((node = virXPathNode("./sysinfo[1]", ctxt)) != NULL) { xmlNodePtr oldnode = ctxt->node; ctxt->node = node; @@ -17395,31 +17517,57 @@ virDomainDefFormatInternal(virDomainDefPtr def, virBufferAddLit(buf, "</cputune>\n"); if (def->numatune.memory.nodemask || - def->numatune.memory.placement_mode) { + def->numatune.memory.placement_mode || + def->numatune.nmem_nodes) { const char *mode; char *nodemask = NULL; const char *placement; virBufferAddLit(buf, "<numatune>\n"); virBufferAdjustIndent(buf, 2); - mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode); - virBufferAsprintf(buf, "<memory mode='%s' ", mode); - if (def->numatune.memory.placement_mode == - VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) { - nodemask = virBitmapFormat(def->numatune.memory.nodemask); + if (def->numatune.memory.nodemask || + def->numatune.memory.placement_mode) { + + mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode); + virBufferAsprintf(buf, "<memory mode='%s' ", mode); + + if (def->numatune.memory.placement_mode == + VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) { + nodemask = virBitmapFormat(def->numatune.memory.nodemask); + if (nodemask == NULL) { + virReportError(VIR_ERR_INTERNAL_ERROR, "%s", + _("failed to format nodeset for " + "NUMA memory tuning")); + goto error; + } + virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask); + VIR_FREE(nodemask); + } else if (def->numatune.memory.placement_mode) { + placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode); + virBufferAsprintf(buf, "placement='%s'/>\n", placement); + } + } + + for (i = 0; i < def->numatune.nmem_nodes; i++) { + struct mem_node *mem_node = &def->numatune.mem_nodes[i]; + if (!mem_node->specified) + continue; + + nodemask = virBitmapFormat(mem_node->nodemask); + mode = virDomainNumatuneMemModeTypeToString(mem_node->mode); if (nodemask == NULL) { virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("failed to format nodeset for " "NUMA memory tuning")); goto error; } - virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask); + virBufferAsprintf(buf, + "<memnode cellid='%zu' mode='%s' nodeset='%s'/>\n", + i, mode, nodemask); VIR_FREE(nodemask); - } else if (def->numatune.memory.placement_mode) { - placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode); - virBufferAsprintf(buf, "placement='%s'/>\n", placement); } + virBufferAdjustIndent(buf, -2); virBufferAddLit(buf, "</numatune>\n"); } diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index bbe32a0..99f9c48 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -712,7 +712,28 @@ qemuDomainDefPostParse(virDomainDefPtr def, bool addDefaultMemballoon = true; bool addDefaultUSBKBD = false; bool addDefaultUSBMouse = false; - + size_t i = 0; + + if (def->numatune.memory.nodemask) { + for (i = 0; i < def->numatune.nmem_nodes; i++) { + struct mem_node *mem_node = &def->numatune.mem_nodes[i]; + ssize_t pos = -1; + bool bit = false; + + if (!mem_node->specified) + continue; + + while ((pos = virBitmapNextSetBit(mem_node->nodemask, pos)) >= 0) { + if (virBitmapGetBit(def->numatune.memory.nodemask, pos, &bit) < 0 || + !bit) { + virReportError(VIR_ERR_XML_DETAIL, "%s", + _("memnode nodeset must be subset of the " + "global memory nodeset")); + return -1; + } + } + } + } /* check for emulator and create a default one if needed */ if (!def->emulator && !(def->emulator = virDomainDefGetDefaultEmulator(def, caps))) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 3a7622a..545516e 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -8646,6 +8646,7 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm, virCgroupPtr cgroup_temp = NULL; virBitmapPtr temp_nodeset = NULL; qemuDomainObjPrivatePtr priv = vm->privateData; + virDomainDefPtr def = vm->def; char *nodeset_str = NULL; size_t i = 0; int ret = -1; @@ -8657,6 +8658,16 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm, goto cleanup; } + for (i = 0; i < def->numatune.nmem_nodes; i++) { + if (def->numatune.mem_nodes[i].specified) { + virReportError(VIR_ERR_OPERATION_INVALID, "%s", + _("change of nodeset for running domain " + "with per guest NUMA node numatune settings " + "is not supported")); + goto cleanup; + } + } + /* Get existing nodeset values */ if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 || virBitmapParse(nodeset_str, 0, &temp_nodeset, diff --git a/src/util/virnuma.h b/src/util/virnuma.h index fe1e966..50fa3f8 100644 --- a/src/util/virnuma.h +++ b/src/util/virnuma.h @@ -1,7 +1,7 @@ /* * virnuma.h: helper APIs for managing numa * - * Copyright (C) 2011-2013 Red Hat, Inc. + * Copyright (C) 2011-2014 Red Hat, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -43,9 +43,17 @@ typedef virNumaTuneDef *virNumaTuneDefPtr; struct _virNumaTuneDef { struct { virBitmapPtr nodemask; - int mode; + int mode; /* enum virDomainNumatuneMemMode */ int placement_mode; /* enum virNumaTuneMemPlacementMode */ - } memory; + } memory; /* pinning for all the memory */ + + struct mem_node { + bool specified; + unsigned int nodeid; + virBitmapPtr nodemask; + int mode; + } *mem_nodes; /* pinning per guest's NUMA node */ + size_t nmem_nodes; /* Future NUMA tuning related stuff should go here. */ }; -- 2.0.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list