[PATCH 3/5] Implement multiple memory backing paths

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Michael Galaxy <mgalaxy@xxxxxxxxxx>

We have different use cases:

1. The domain has multiple NUMA nodes, but they have only specified
   a single directory pathin qemu.conf (Original default behavior)
2. Domain has multiple NUMA nodes, but we have asked for multiple
   directory paths as well (new behavior).
3. Domain has single NUMA node, but we have asked for multiple directory
   paths (new behavior).

Each one is elaborated more inline below in the comments.

Signed-off-by: Michael Galaxy <mgalaxy@xxxxxxxxxx>
---
 src/qemu/qemu_command.c |   8 +++-
 src/qemu/qemu_conf.c    | 101 ++++++++++++++++++++++++++++++++++++----
 src/qemu/qemu_conf.h    |  11 +++--
 3 files changed, 106 insertions(+), 14 deletions(-)

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index b8f071ff2a..818e409d20 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -3448,7 +3448,9 @@ qemuBuildMemoryBackendProps(virJSONValue **backendProps,
         } else {
             /* We can have both pagesize and mem source. If that's the case,
              * prefer hugepages as those are more specific. */
-            if (qemuGetMemoryBackingPath(priv->driver, def, mem->info.alias, &memPath) < 0)
+
+            virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv;
+            if (qemuGetMemoryBackingPath(def, privateData, mem->targetNode, mem->info.alias, &memPath) < 0)
                 return -1;
         }
 
@@ -7291,7 +7293,9 @@ qemuBuildMemPathStr(const virDomainDef *def,
             return -1;
         prealloc = true;
     } else if (def->mem.source == VIR_DOMAIN_MEMORY_SOURCE_FILE) {
-        if (qemuGetMemoryBackingPath(priv->driver, def, "ram", &mem_path) < 0)
+        // This path should not be reached if NUMA is requested
+        virDomainXMLPrivateDataCallbacks *privateData = (virDomainXMLPrivateDataCallbacks *) priv;
+        if (qemuGetMemoryBackingPath(def, privateData, 0, "ram", &mem_path) < 0)
             return -1;
     }
 
diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index aae9f316d8..e327a906a3 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -1622,22 +1622,106 @@ qemuGetDomainHupageMemPath(virQEMUDriver *driver,
 
 
 int
-qemuGetMemoryBackingDomainPath(virQEMUDriver *driver,
-                               const virDomainDef *def,
+qemuGetMemoryBackingDomainPath(const virDomainDef *def,
+			       virDomainXMLPrivateDataCallbacks *priv,
+			       const size_t targetNode,
                                char **path)
 {
+    qemuDomainObjPrivate *privateData = (qemuDomainObjPrivate *) priv;
+    virQEMUDriver *driver = privateData->driver;
     g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver);
     const char *root = driver->embeddedRoot;
     g_autofree char *shortName = NULL;
+    size_t path_index = 0; // original behavior, described below
 
     if (!(shortName = virDomainDefGetShortName(def)))
         return -1;
 
-    if (root && !STRPREFIX(cfg->memoryBackingDir, root)) {
+    /*
+     * We have three use cases:
+     *
+     * 1. Domain has multiple NUMA nodes, but they have only specified
+     *    a single directory path in qemu.conf. (Original default behavior).
+     *
+     *    In this case, we already placed the memory backing path for each NUMA node
+     *    into the same path location. Preserve the established default behavior.
+     *
+     * 2. Domain has multiple NUMA nodes, but we have asked for multiple directory
+     *    paths as well.
+     *
+     *    In this case, we will have a one-to-one relationship between the number
+     *    of NUMA nodes and the order in which the paths are provided.
+     *    If the user does not specify enough paths, then we need to throw an error.
+     *    NOTE: This is open to comment. The "ordering" of the paths here is not intially
+     *    configurable to preserve backwards compatibility with the original qemu.conf syntax.
+     *    If controlling the ordering is desired, we would need to revise the syntax in
+     *    qemu.conf to make that possible. That hasn't been needed so far.
+     *
+     *    NOTE A): We must check with numatune here, if requested. The number of NUMA nodes
+     *    	may be less than or equal to the number of provided paths. If it is less,
+     *    	we have to respect the choices made by numatune. In this case, we will map the
+     *          physical NUMA nodes (0, 1, 2...) in the order in which they appear in qemu.conf
+     *
+     * 3. Domain has a single NUMA node, but we have asked for multiple directory paths.
+     *
+     * 	  In this case we also need to check if numatune is requested. If so,
+     * 	  we want to pick the path indicated by numatune.
+     *
+     * NOTE B): In both cases 2 and 3, if numatune is requested, the path obviously cannot
+     *         be changed on the fly, like it normally would be in "restrictive" mode
+     *         during runtime. So, we will only do this is the mode requested is "strict".
+     *
+     * NOTE C): Furthermore, in both cases 2 and 3, if the number of directory paths provided
+     * 		is more than one, and one of either: a) no numatune is provided at all or
+     *          b) numatune is in fact provided, but the mode is not strict,
+     *          then we must thrown error. This is because we cannot know which backing
+     *          directory path to choose without the user's input.
+     *
+     * NOTE D): If one or more directory paths is requested in any of the cases 1, 2, or 3,
+     *          the numatune cannot specifiy more than one NUMA node, because the only mode
+     *          possible with directory paths is "strict" (e.g. automatic numa balancing of
+     *          memory will not work). Only one numa node can be requested by numatune, else
+     *          we must throw an error.
+     */
+
+    if (cfg->nb_memoryBackingDirs > 1) {
+	    virDomainNuma *numatune = def->numa;
+	    virBitmap *numaBitmap = virDomainNumatuneGetNodeset(numatune, privateData->autoNodeset, targetNode);
+	    size_t numa_node_count = virDomainNumaGetNodeCount(def->numa);
+	    virDomainNumatuneMemMode mode;
+
+	    if ((numatune && numaBitmap && virNumaNodesetIsAvailable(numaBitmap)) &&
+		virDomainNumatuneGetMode(def->numa, -1, &mode) == 0 &&
+	        mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
+	        virBitmapCountBits(numaBitmap) == 1) {
+		    // Is numatune provided?
+		    // Is it strict?
+		    // Does it only specify a single pinning for this target?
+		    // Yes to all 3? then good to go.
+
+		    if (cfg->nb_memoryBackingDirs < numa_node_count) {
+			virReportError(VIR_ERR_INTERNAL_ERROR,
+			       _("Domain requesting configuration for %lu NUMA nodes, but memory backing directory only has (%lu) directory paths available. Either reduce this to one directory or provide more paths to use."), numa_node_count, cfg->nb_memoryBackingDirs);
+			return -1;
+		    }
+
+		    path_index = virBitmapNextSetBit(numaBitmap, -1);
+	    } else if (numa_node_count > 1 && numa_node_count == cfg->nb_memoryBackingDirs) {
+		// Be nice. A valid numatune and pinning has not been specified, but the number
+		// of paths matches up exactly, so just assign them one-to-one.
+	        path_index = targetNode;
+	    } else {
+		virReportError(VIR_ERR_INTERNAL_ERROR,
+			       _("There are (%lu) memory directory directories configured. Domain must use a 'strict' numatune as well as an associated pinning configuration for each NUMA node before proceeding. An individual NUMA node can only be pinned to a single backing directory. Please correct the domain configuration or remove the memory backing directories and try again."), cfg->nb_memoryBackingDirs);
+		return -1;
+	    }
+    }
+
+    if (root && !STRPREFIX(cfg->memoryBackingDirs[path_index], root)) {
         g_autofree char * hash = virDomainDriverGenerateRootHash("qemu", root);
-        *path = g_strdup_printf("%s/%s-%s", cfg->memoryBackingDir, hash, shortName);
+        *path = g_strdup_printf("%s/%s-%s", cfg->memoryBackingDirs[path_index], hash, shortName);
     } else {
-        *path = g_strdup_printf("%s/%s", cfg->memoryBackingDir, shortName);
+        *path = g_strdup_printf("%s/%s", cfg->memoryBackingDirs[path_index], shortName);
     }
 
     return 0;
@@ -1657,8 +1741,9 @@ qemuGetMemoryBackingDomainPath(virQEMUDriver *driver,
  *          -1 otherwise (with error reported).
  */
 int
-qemuGetMemoryBackingPath(virQEMUDriver *driver,
-                         const virDomainDef *def,
+qemuGetMemoryBackingPath(const virDomainDef *def,
+			 virDomainXMLPrivateDataCallbacks *priv,
+			 const size_t targetNode,
                          const char *alias,
                          char **memPath)
 {
@@ -1671,7 +1756,7 @@ qemuGetMemoryBackingPath(virQEMUDriver *driver,
         return -1;
     }
 
-    if (qemuGetMemoryBackingDomainPath(driver, def, &domainPath) < 0)
+    if (qemuGetMemoryBackingDomainPath(def, priv, targetNode, &domainPath) < 0)
         return -1;
 
     *memPath = g_strdup_printf("%s/%s", domainPath, alias);
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index 2b8d540df0..4ae21524f7 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -370,11 +370,14 @@ int qemuGetDomainHupageMemPath(virQEMUDriver *driver,
                                unsigned long long pagesize,
                                char **memPath);
 
-int qemuGetMemoryBackingDomainPath(virQEMUDriver *driver,
-                                   const virDomainDef *def,
+int qemuGetMemoryBackingDomainPath(const virDomainDef *def,
+				   virDomainXMLPrivateDataCallbacks *priv,
+				   const size_t targetNode,
                                    char **path);
-int qemuGetMemoryBackingPath(virQEMUDriver *driver,
-                             const virDomainDef *def,
+
+int qemuGetMemoryBackingPath(const virDomainDef *def,
+			     virDomainXMLPrivateDataCallbacks *priv,
+			     const size_t targetNode,
                              const char *alias,
                              char **memPath);
 
-- 
2.25.1
_______________________________________________
Devel mailing list -- devel@xxxxxxxxxxxxxxxxx
To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxx




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux