[PATCH 2/5] conf, schema: add support for numatune memnode element

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This element specifies similar settings as the memory element,
although memnode can be used per guest NUMA node.

Signed-off-by: Martin Kletzander <mkletzan@xxxxxxxxxx>
---
 docs/formatdomain.html.in     |  18 +++++
 docs/schemas/domaincommon.rng |  17 ++++
 src/conf/domain_conf.c        | 181 +++++++++++++++++++++++++++++++++---------
 src/qemu/qemu_domain.c        |  23 +++++-
 src/qemu/qemu_driver.c        |  12 +++
 src/util/virnuma.h            |  14 +++-
 6 files changed, 225 insertions(+), 40 deletions(-)

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 041f70d..fd29ae3 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -709,6 +709,8 @@
   ...
   &lt;numatune&gt;
     &lt;memory mode="strict" nodeset="1-4,^3"/&gt;
+    &lt;memnode cellid="0" mode="strict" nodeset="1"/&gt;
+    &lt;memnode cellid="2" mode="preferred" nodeset="2"/&gt;
   &lt;/numatune&gt;
   ...
 &lt;/domain&gt;
@@ -745,6 +747,22 @@

         <span class='since'>Since 0.9.3</span>
       </dd>
+      <dt><code>memnode</code></dt>
+      <dd>
+        Optional <code>memnode</code> elements can specify memory allocation
+        policies per each guest NUMA node.  For those nodes having no
+        corresponding <code>memnode</code> element, the default from
+        element <code>memory</code> will be used.  Attribute <code>cellid</code>
+        addresses guest NUMA node for which the settings are applied.
+        Attributes <code>mode</code> and <code>nodeset</code> have the same
+        meaning and syntax as in <code>memory</code> element.
+
+        Due to possible memory migration issues according to kernel settings,
+        using this <code>memnode</code> element effectively disables any live
+        changes of numatune settings in current versions of libvirt.
+
+        <span class='since'>QEMU Since 1.2.6</span>
+      </dd>
     </dl>


diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 0787b5a..a8e3ba0 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -789,6 +789,23 @@
               </choice>
         </element>
       </optional>
+      <zeroOrMore>
+        <element name="memnode">
+          <attribute name="cellid">
+            <ref name="unsignedInt"/>
+          </attribute>
+          <attribute name="mode">
+            <choice>
+              <value>strict</value>
+              <value>preferred</value>
+              <value>interleave</value>
+            </choice>
+          </attribute>
+          <attribute name='nodeset'>
+            <ref name='cpuset'/>
+          </attribute>
+        </element>
+      </zeroOrMore>
     </element>
   </define>

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index f1df092..4818cfb 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -2085,6 +2085,9 @@ void virDomainDefFree(virDomainDefPtr def)
     virDomainVcpuPinDefFree(def->cputune.emulatorpin);

     virBitmapFree(def->numatune.memory.nodemask);
+    for (i = 0; i < def->numatune.nmem_nodes; i++)
+        virBitmapFree(def->numatune.mem_nodes[i].nodemask);
+    VIR_FREE(def->numatune.mem_nodes);

     virSysinfoDefFree(def->sysinfo);

@@ -11233,6 +11236,7 @@ virDomainDefParseXML(xmlDocPtr xml,
     bool usb_master = false;
     bool primaryVideo = false;

+
     if (VIR_ALLOC(def) < 0)
         return NULL;

@@ -11666,6 +11670,33 @@ virDomainDefParseXML(xmlDocPtr xml,
     }
     VIR_FREE(nodes);

+
+    /* analysis of cpu handling */
+    if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
+        xmlNodePtr oldnode = ctxt->node;
+        ctxt->node = node;
+        def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST);
+        ctxt->node = oldnode;
+
+        if (def->cpu == NULL)
+            goto error;
+
+        if (def->cpu->sockets &&
+            def->maxvcpus >
+            def->cpu->sockets * def->cpu->cores * def->cpu->threads) {
+            virReportError(VIR_ERR_XML_DETAIL, "%s",
+                           _("Maximum CPUs greater than topology limit"));
+            goto error;
+        }
+
+        if (def->cpu->cells_cpus > def->maxvcpus) {
+            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                           _("Number of CPUs in <numa> exceeds the"
+                             " <vcpu> count"));
+            goto error;
+        }
+    }
+
     /* Extract numatune if exists. */
     if ((n = virXPathNodeSet("./numatune", ctxt, &nodes)) < 0) {
         virReportError(VIR_ERR_INTERNAL_ERROR,
@@ -11682,6 +11713,12 @@ virDomainDefParseXML(xmlDocPtr xml,

     if (n) {
         cur = nodes[0]->children;
+        if (def->cpu) {
+            if (VIR_ALLOC_N(def->numatune.mem_nodes, def->cpu->ncells) < 0)
+                goto error;
+            def->numatune.nmem_nodes = def->cpu->ncells;
+        }
+
         while (cur != NULL) {
             if (cur->type == XML_ELEMENT_NODE) {
                 if (xmlStrEqual(cur->name, BAD_CAST "memory")) {
@@ -11764,6 +11801,78 @@ virDomainDefParseXML(xmlDocPtr xml,
                         def->placement_mode = VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO;

                     def->numatune.memory.placement_mode = placement_mode;
+
+                } else if (xmlStrEqual(cur->name, BAD_CAST "memnode")) {
+                    unsigned int cellid;
+                    struct mem_node *mem_node = NULL;
+
+                    if (!def->numatune.nmem_nodes) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Element 'memnode' is invalid without "
+                                         "any guest NUMA cells"));
+                        goto error;
+                    }
+                    tmp = virXMLPropString(cur, "cellid");
+                    if (!tmp) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Missing required cellid attribute "
+                                         "in numatune memnode element"));
+                        goto error;
+                    }
+                    if (virStrToLong_ui(tmp, NULL, 10, &cellid) < 0) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Invalid cellid attribute "
+                                         "in numatune memnode element"));
+                        goto error;
+                    }
+                    VIR_FREE(tmp);
+
+                    if (cellid >= def->numatune.nmem_nodes) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Argument 'cellid' in numatune "
+                                         "memnode element must correspond to "
+                                         "existing guest's NUMA cell"));
+                        goto error;
+                    }
+
+                    mem_node = &def->numatune.mem_nodes[cellid];
+
+                    if (mem_node->specified) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Multiple numatune memnode elements "
+                                         "with duplicate 'cellid'"));
+                        goto error;
+                    }
+
+                    mem_node->specified = true;
+
+                    tmp = virXMLPropString(cur, "mode");
+                    if (tmp &&
+                        (mem_node->mode =
+                         virDomainNumatuneMemModeTypeFromString(tmp)) < 0) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Invalid mode attribute "
+                                         "in numatune memnode element"));
+                        goto error;
+                    } else if (!tmp) {
+                        mem_node->mode = VIR_DOMAIN_NUMATUNE_MEM_STRICT;
+                    }
+                    VIR_FREE(tmp);
+
+                    tmp = virXMLPropString(cur, "nodeset");
+                    if (!tmp) {
+                        virReportError(VIR_ERR_XML_ERROR, "%s",
+                                       _("Missing required nodeset attribute "
+                                         "in numatune memnode element"));
+                        goto error;
+                    }
+                    if (virBitmapParse(tmp, 0,
+                                       &mem_node->nodemask,
+                                       VIR_DOMAIN_CPUMASK_LEN) < 0) {
+                        goto error;
+                    }
+                    VIR_FREE(tmp);
+
                 } else {
                     virReportError(VIR_ERR_XML_ERROR,
                                    _("unsupported XML element %s"),
@@ -12863,32 +12972,6 @@ virDomainDefParseXML(xmlDocPtr xml,
             goto error;
     }

-    /* analysis of cpu handling */
-    if ((node = virXPathNode("./cpu[1]", ctxt)) != NULL) {
-        xmlNodePtr oldnode = ctxt->node;
-        ctxt->node = node;
-        def->cpu = virCPUDefParseXML(node, ctxt, VIR_CPU_TYPE_GUEST);
-        ctxt->node = oldnode;
-
-        if (def->cpu == NULL)
-            goto error;
-
-        if (def->cpu->sockets &&
-            def->maxvcpus >
-            def->cpu->sockets * def->cpu->cores * def->cpu->threads) {
-            virReportError(VIR_ERR_XML_DETAIL, "%s",
-                           _("Maximum CPUs greater than topology limit"));
-            goto error;
-        }
-
-        if (def->cpu->cells_cpus > def->maxvcpus) {
-            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                           _("Number of CPUs in <numa> exceeds the"
-                             " <vcpu> count"));
-            goto error;
-        }
-    }
-
     if ((node = virXPathNode("./sysinfo[1]", ctxt)) != NULL) {
         xmlNodePtr oldnode = ctxt->node;
         ctxt->node = node;
@@ -17395,31 +17478,57 @@ virDomainDefFormatInternal(virDomainDefPtr def,
         virBufferAddLit(buf, "</cputune>\n");

     if (def->numatune.memory.nodemask ||
-        def->numatune.memory.placement_mode) {
+        def->numatune.memory.placement_mode ||
+        def->numatune.nmem_nodes) {
         const char *mode;
         char *nodemask = NULL;
         const char *placement;

         virBufferAddLit(buf, "<numatune>\n");
         virBufferAdjustIndent(buf, 2);
-        mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode);
-        virBufferAsprintf(buf, "<memory mode='%s' ", mode);

-        if (def->numatune.memory.placement_mode ==
-            VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
-            nodemask = virBitmapFormat(def->numatune.memory.nodemask);
+        if (def->numatune.memory.nodemask ||
+            def->numatune.memory.placement_mode) {
+
+            mode = virDomainNumatuneMemModeTypeToString(def->numatune.memory.mode);
+            virBufferAsprintf(buf, "<memory mode='%s' ", mode);
+
+            if (def->numatune.memory.placement_mode ==
+                VIR_NUMA_TUNE_MEM_PLACEMENT_MODE_STATIC) {
+                nodemask = virBitmapFormat(def->numatune.memory.nodemask);
+                if (nodemask == NULL) {
+                    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                                   _("failed to format nodeset for "
+                                     "NUMA memory tuning"));
+                    goto error;
+                }
+                virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask);
+                VIR_FREE(nodemask);
+            } else if (def->numatune.memory.placement_mode) {
+                placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode);
+                virBufferAsprintf(buf, "placement='%s'/>\n", placement);
+            }
+        }
+
+        for (i = 0; i < def->numatune.nmem_nodes; i++) {
+            struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+            if (!mem_node->specified)
+                continue;
+
+            nodemask = virBitmapFormat(mem_node->nodemask);
+            mode = virDomainNumatuneMemModeTypeToString(mem_node->mode);
             if (nodemask == NULL) {
                 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                                _("failed to format nodeset for "
                                  "NUMA memory tuning"));
                 goto error;
             }
-            virBufferAsprintf(buf, "nodeset='%s'/>\n", nodemask);
+            virBufferAsprintf(buf,
+                              "<memnode nodeid='%zu' mode='%s' nodeset='%s'/>\n",
+                              i, mode, nodemask);
             VIR_FREE(nodemask);
-        } else if (def->numatune.memory.placement_mode) {
-            placement = virNumaTuneMemPlacementModeTypeToString(def->numatune.memory.placement_mode);
-            virBufferAsprintf(buf, "placement='%s'/>\n", placement);
         }
+
         virBufferAdjustIndent(buf, -2);
         virBufferAddLit(buf, "</numatune>\n");
     }
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 78cfdc6..89800ab 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -710,7 +710,28 @@ qemuDomainDefPostParse(virDomainDefPtr def,
     bool addDefaultMemballoon = true;
     bool addDefaultUSBKBD = false;
     bool addDefaultUSBMouse = false;
-
+    size_t i = 0;
+
+    if (def->numatune.memory.nodemask) {
+        for (i = 0; i < def->numatune.nmem_nodes; i++) {
+            struct mem_node *mem_node = &def->numatune.mem_nodes[i];
+            ssize_t pos = -1;
+            bool bit = false;
+
+            if (!mem_node->specified)
+                continue;
+
+            while ((pos = virBitmapNextSetBit(mem_node->nodemask, pos)) >= 0) {
+                if (virBitmapGetBit(def->numatune.memory.nodemask, pos, &bit) < 0 ||
+                    !bit) {
+                    virReportError(VIR_ERR_XML_DETAIL, "%s",
+                                   _("memnode nodeset must be subset of the "
+                                     "global memory nodeset"));
+                    return -1;
+                }
+            }
+        }
+    }
     /* check for emulator and create a default one if needed */
     if (!def->emulator &&
         !(def->emulator = virDomainDefGetDefaultEmulator(def, caps)))
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 2b852eb..d838ad2 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -8613,6 +8613,16 @@ qemuDomainSetNumaParamsLive(virDomainObjPtr vm,
         goto cleanup;
     }

+    for (i = 0; i < def->numatune->nmem_nodes; i++) {
+        if (def->numatune->mem_nodes[i].specified) {
+            virReportError(VIR_ERR_OPERATION_INVALID, "%s",
+                           _("change of nodeset for running domain "
+                             "with per guest NUMA node numatune settings "
+                             "is not supported"));
+            goto cleanup;
+        }
+    }
+
     /* Get existing nodeset values */
     if (virCgroupGetCpusetMems(priv->cgroup, &nodeset_str) < 0 ||
         virBitmapParse(nodeset_str, 0, &temp_nodeset,
@@ -8853,6 +8863,8 @@ qemuDomainGetNumaParameters(virDomainPtr dom,
         }
     }

+    /* ASDF: <memnode> settings are not reflected here! yet */
+
     for (i = 0; i < QEMU_NB_NUMA_PARAM && i < *nparams; i++) {
         virMemoryParameterPtr param = &params[i];

diff --git a/src/util/virnuma.h b/src/util/virnuma.h
index 8464b19..5e7608d 100644
--- a/src/util/virnuma.h
+++ b/src/util/virnuma.h
@@ -1,7 +1,7 @@
 /*
  * virnuma.h: helper APIs for managing numa
  *
- * Copyright (C) 2011-2013 Red Hat, Inc.
+ * Copyright (C) 2011-2014 Red Hat, Inc.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -43,9 +43,17 @@ typedef virNumaTuneDef *virNumaTuneDefPtr;
 struct _virNumaTuneDef {
     struct {
         virBitmapPtr nodemask;
-        int mode;
+        int mode;           /* enum virDomainNumatuneMemMode */
         int placement_mode; /* enum virNumaTuneMemPlacementMode */
-    } memory;
+    } memory;               /* pinning for all the memory */
+
+    struct mem_node {
+        bool specified;
+        unsigned int nodeid;
+        virBitmapPtr nodemask;
+        int mode;
+    } *mem_nodes;          /* pinning per guest's NUMA node */
+    size_t nmem_nodes;

     /* Future NUMA tuning related stuff should go here. */
 };
-- 
1.9.3

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]