[PATCH v2 2/2] qemu: Don't use -mem-prealloc among with .prealloc=yes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



https://bugzilla.redhat.com/show_bug.cgi?id=1624223

There are two ways to request memory preallocation on cmd line:
-mem-prealloc and .prealloc attribute for a memory-backend-file.
However, as it turns out it's not safe to use both at the same
time. If -mem-prealloc is used then qemu will fullly allocate the
memory (this is done by actually touching every page that has
been allocated). Then, if .prealloc=yes is specified,
mbind(flags = MPOL_MF_STRICT | MPOL_MF_MOVE) is called which:

a) has to (possibly) move the memory to a different NUMA node,
b) can have no effect when hugepages are in play (thus ignoring user
request to place memory on desired NUMA nodes).

Prefer -mem-prealloc as it is more backward compatible
compared to switching to "-numa node,memdev=  + -object
memory-backend-file".

Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx>
---
 src/qemu/qemu_command.c                       | 26 ++++++++++++-------
 src/qemu/qemu_domain.c                        |  7 +++++
 src/qemu/qemu_domain.h                        |  3 +++
 .../hugepages-numa-default-dimm.args          |  2 +-
 4 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 01a3141134..1a1cb9cbbd 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3357,11 +3357,13 @@ qemuBuildMemoryBackendProps(virJSONValuePtr *backendProps,
         if (mem->nvdimmPath) {
             if (VIR_STRDUP(memPath, mem->nvdimmPath) < 0)
                 goto cleanup;
-            prealloc = true;
+            if (!priv->memPrealloc)
+                prealloc = true;
         } else if (useHugepage) {
             if (qemuGetDomainHupageMemPath(def, cfg, pagesize, &memPath) < 0)
                 goto cleanup;
-            prealloc = true;
+            if (!priv->memPrealloc)
+                prealloc = true;
         } else {
             /* We can have both pagesize and mem source. If that's the case,
              * prefer hugepages as those are more specific. */
@@ -7576,7 +7578,8 @@ qemuBuildSmpCommandLine(virCommandPtr cmd,
 static int
 qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
                     const virDomainDef *def,
-                    virCommandPtr cmd)
+                    virCommandPtr cmd,
+                    qemuDomainObjPrivatePtr priv)
 {
     const long system_page_size = virGetSystemPageSizeKB();
     char *mem_path = NULL;
@@ -7598,8 +7601,10 @@ qemuBuildMemPathStr(virQEMUDriverConfigPtr cfg,
         return 0;
     }
 
-    if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE)
+    if (def->mem.allocation != VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) {
         virCommandAddArgList(cmd, "-mem-prealloc", NULL);
+        priv->memPrealloc = true;
+    }
 
     virCommandAddArgList(cmd, "-mem-path", mem_path, NULL);
     VIR_FREE(mem_path);
@@ -7612,7 +7617,8 @@ static int
 qemuBuildMemCommandLine(virCommandPtr cmd,
                         virQEMUDriverConfigPtr cfg,
                         const virDomainDef *def,
-                        virQEMUCapsPtr qemuCaps)
+                        virQEMUCapsPtr qemuCaps,
+                        qemuDomainObjPrivatePtr priv)
 {
     if (qemuDomainDefValidateMemoryHotplug(def, qemuCaps, NULL) < 0)
         return -1;
@@ -7631,15 +7637,17 @@ qemuBuildMemCommandLine(virCommandPtr cmd,
                               virDomainDefGetMemoryInitial(def) / 1024);
     }
 
-    if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE)
+    if (def->mem.allocation == VIR_DOMAIN_MEMORY_ALLOCATION_IMMEDIATE) {
         virCommandAddArgList(cmd, "-mem-prealloc", NULL);
+        priv->memPrealloc = true;
+    }
 
     /*
      * Add '-mem-path' (and '-mem-prealloc') parameter here if
      * the hugepages and no numa node is specified.
      */
     if (!virDomainNumaGetNodeCount(def->numa) &&
-        qemuBuildMemPathStr(cfg, def, cmd) < 0)
+        qemuBuildMemPathStr(cfg, def, cmd, priv) < 0)
         return -1;
 
     if (def->mem.locked && !virQEMUCapsGet(qemuCaps, QEMU_CAPS_REALTIME_MLOCK)) {
@@ -7748,7 +7756,7 @@ qemuBuildNumaArgStr(virQEMUDriverConfigPtr cfg,
     }
 
     if (!needBackend &&
-        qemuBuildMemPathStr(cfg, def, cmd) < 0)
+        qemuBuildMemPathStr(cfg, def, cmd, priv) < 0)
         goto cleanup;
 
     for (i = 0; i < ncells; i++) {
@@ -10441,7 +10449,7 @@ qemuBuildCommandLine(virQEMUDriverPtr driver,
     if (!migrateURI && !snapshot && qemuDomainAlignMemorySizes(def) < 0)
         goto error;
 
-    if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps) < 0)
+    if (qemuBuildMemCommandLine(cmd, cfg, def, qemuCaps, priv) < 0)
         goto error;
 
     if (qemuBuildSmpCommandLine(cmd, def) < 0)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index 509da6bfea..039b887d8e 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -1944,6 +1944,8 @@ qemuDomainObjPrivateDataClear(qemuDomainObjPrivatePtr priv)
     VIR_FREE(priv->libDir);
     VIR_FREE(priv->channelTargetDir);
 
+    priv->memPrealloc = false;
+
     /* remove automatic pinning data */
     virBitmapFree(priv->autoNodeset);
     priv->autoNodeset = NULL;
@@ -2489,6 +2491,9 @@ qemuDomainObjPrivateXMLFormat(virBufferPtr buf,
     if (virQEMUCapsGet(priv->qemuCaps, QEMU_CAPS_BLOCKDEV))
         virBufferAsprintf(buf, "<nodename index='%llu'/>\n", priv->nodenameindex);
 
+    if (priv->memPrealloc)
+        virBufferAddLit(buf, "<memPrealloc/>\n");
+
     if (qemuDomainObjPrivateXMLFormatBlockjobs(buf, vm) < 0)
         return -1;
 
@@ -2993,6 +2998,8 @@ qemuDomainObjPrivateXMLParse(xmlXPathContextPtr ctxt,
         goto error;
     }
 
+    priv->memPrealloc = virXPathBoolean("boolean(./memPrealloc)", ctxt) == 1;
+
     return 0;
 
  error:
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 53b5ea1678..b7347c72ce 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -372,6 +372,9 @@ struct _qemuDomainObjPrivate {
 
     /* true if libvirt remembers the original owner for files */
     bool rememberOwner;
+
+    /* true if global -mem-prealloc appears on cmd line */
+    bool memPrealloc;
 };
 
 # define QEMU_DOMAIN_PRIVATE(vm) \
diff --git a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args
index 143d8b041f..df90f7aad9 100644
--- a/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args
+++ b/tests/qemuxml2argvdata/hugepages-numa-default-dimm.args
@@ -13,7 +13,7 @@ QEMU_AUDIO_DRV=none \
 -mem-prealloc \
 -mem-path /dev/hugepages2M/libvirt/qemu/-1-fedora \
 -numa node,nodeid=0,cpus=0-1,mem=1024 \
--object memory-backend-file,id=memdimm0,prealloc=yes,\
+-object memory-backend-file,id=memdimm0,\
 mem-path=/dev/hugepages1G/libvirt/qemu/-1-fedora,size=1073741824,\
 host-nodes=1-3,policy=bind \
 -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \
-- 
2.19.2

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list



[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux