[libvirt PATCH 1/4] conf: allow different resource registration modes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Daniel P. Berrange" <berrange@xxxxxxxxxx>

Currently the QEMU driver has three ways of setting up cgroups. It either
skips them entirely (if non-root), or uses systemd-machined, or uses
cgroups directly.

This change adds ability to configure the mechanism for registering
resources between all these options explicitly. via

  <resource backend="none|cgroupfs|machined"/>

It is further possible to register directly with systemd and bypass
machined. We don't support this but systemd-nsspawn does and we ought
to consider this at some point. This would involve a new "systemd"
backend type alongside "machined".

Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx>
---
 docs/formatdomain.html.in                 | 24 +++++++++++-
 docs/schemas/domaincommon.rng             | 17 +++++++--
 src/conf/domain_conf.c                    | 46 ++++++++++++++++++-----
 src/conf/domain_conf.h                    | 13 +++++++
 src/libvirt_private.syms                  |  2 +
 src/lxc/lxc_cgroup.c                      |  8 ++++
 src/lxc/lxc_process.c                     |  1 +
 src/qemu/qemu_cgroup.c                    | 14 +++++++
 tests/lxcxml2xmldata/lxc-capabilities.xml |  2 +-
 tests/lxcxml2xmldata/lxc-idmap.xml        |  2 +-
 10 files changed, 113 insertions(+), 16 deletions(-)

diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in
index 0d229386eb..a016e789f1 100644
--- a/docs/formatdomain.html.in
+++ b/docs/formatdomain.html.in
@@ -1445,7 +1445,7 @@
     </p>
 <pre>
 ...
-&lt;resource&gt;
+&lt;resource backend='none|cgroupfs|machined'&gt;
   &lt;partition&gt;/virtualmachines/production&lt;/partition&gt;
 &lt;/resource&gt;
 ...
@@ -1455,8 +1455,30 @@
       Resource partitions are currently supported by the QEMU and
       LXC drivers, which map partition paths to cgroups directories,
       in all mounted controllers. <span class="since">Since 1.0.5</span>
+      There is a choice of implementations to use for resource partitions
+      controlled via the optional <code>backend</code> attribute.
+      <span class="since">Since 6.2.0</span>. It accepts the values
     </p>
 
+    <ul>
+      <dt>none</dt>
+      <dd>Resource management in libvirt is disabled, with the APIs
+        returning an error indicating the functionality is not available.
+        The QEMU will will remain in whatever cgroup the libvirt daemon
+        was in. On systemd hosts, this will result in QEMU being
+        terminated at the same time as the privileged libvirt management
+        daemon which launched them.</dd>
+      <dt>cgroupfs</dt>
+      <dd>Cgroups will be directly created via the cgroups virtual filesystem.
+        This is not recommended for use in scenarios where systemd is in
+        charge of the cgroup hierarchy, unless the resource partition points
+        to a subtree that systemd has delegated administrative for.</dd>
+      <dt>machined</dt>
+      <dd>Systemd machined will be called to indirectly create cgroups.
+        This is recommended for any host where systemd is managing the
+        cgroup hierarchy.</dd>
+    </ul>
+
     <h3><a id="elementsCPU">CPU model and topology</a></h3>
 
     <p>
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index 6805420451..29ffc3a3cf 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -1126,9 +1126,20 @@
 
   <define name="respartition">
     <element name="resource">
-      <element name="partition">
-        <ref name="absFilePath"/>
-      </element>
+      <optional>
+        <attribute name="backend">
+          <choice>
+            <value>none</value>
+            <value>cgroupfs</value>
+            <value>machined</value>
+          </choice>
+        </attribute>
+      </optional>
+      <optional>
+        <element name="partition">
+          <ref name="absFilePath"/>
+        </element>
+      </optional>
     </element>
   </define>
 
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index e0432fc47d..ae512283d0 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -1244,6 +1244,13 @@ VIR_ENUM_IMPL(virDomainOsDefFirmware,
               "efi",
 );
 
+VIR_ENUM_IMPL(virDomainResourceBackend,
+              VIR_DOMAIN_RESOURCE_BACKEND_LAST,
+              "default",
+              "none",
+              "cgroupfs",
+              "machined");
+
 /* Internal mapping: subset of block job types that can be present in
  * <mirror> XML (remaining types are not two-phase). */
 VIR_ENUM_DECL(virDomainBlockJob);
@@ -19100,17 +19107,24 @@ virDomainResourceDefParse(xmlNodePtr node,
 {
     VIR_XPATH_NODE_AUTORESTORE(ctxt);
     virDomainResourceDefPtr def = NULL;
+    g_autofree char *reg = NULL;
 
     ctxt->node = node;
 
     if (VIR_ALLOC(def) < 0)
         goto error;
 
-    /* Find out what type of virtualization to use */
-    if (!(def->partition = virXPathString("string(./partition)", ctxt))) {
-        virReportError(VIR_ERR_INTERNAL_ERROR,
-                       "%s", _("missing resource partition attribute"));
-        goto error;
+    def->partition = virXPathString("string(./partition)", ctxt);
+
+    reg = virXMLPropString(node, "backend");
+    if (reg != NULL) {
+        if ((def->backend = virDomainResourceBackendTypeFromString(reg)) <= 0) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           "%s", _("Invalid backend attribute"));
+            goto error;
+        }
+    } else {
+        def->backend = VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT;
     }
 
     return def;
@@ -27983,11 +27997,23 @@ static void
 virDomainResourceDefFormat(virBufferPtr buf,
                            virDomainResourceDefPtr def)
 {
-    virBufferAddLit(buf, "<resource>\n");
-    virBufferAdjustIndent(buf, 2);
-    virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition);
-    virBufferAdjustIndent(buf, -2);
-    virBufferAddLit(buf, "</resource>\n");
+    if (def->backend == VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT &&
+        def->partition == NULL)
+        return;
+
+    virBufferAddLit(buf, "<resource");
+    if (def->backend != VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT)
+        virBufferAsprintf(buf, " backend='%s'", virDomainResourceBackendTypeToString(def->backend));
+
+    if (def->partition) {
+        virBufferAddLit(buf, ">\n");
+        virBufferAdjustIndent(buf, 2);
+        virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition);
+        virBufferAdjustIndent(buf, -2);
+        virBufferAddLit(buf, "</resource>\n");
+    } else {
+        virBufferAddLit(buf, "/>\n");
+    }
 }
 
 
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 73bd097cf8..4bfda29dee 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -2228,7 +2228,19 @@ struct _virDomainPanicDef {
 void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights,
                               int ndevices);
 
+typedef enum {
+    VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT,
+    VIR_DOMAIN_RESOURCE_BACKEND_NONE,
+    VIR_DOMAIN_RESOURCE_BACKEND_CGROUPFS,
+    VIR_DOMAIN_RESOURCE_BACKEND_MACHINED,
+
+    VIR_DOMAIN_RESOURCE_BACKEND_LAST,
+} virDomainResourceBackend;
+
+typedef struct _virDomainResourceDef virDomainResourceDef;
+typedef virDomainResourceDef *virDomainResourceDefPtr;
 struct _virDomainResourceDef {
+    int backend;    /* enum virDomainResourceBackend */
     char *partition;
 };
 
@@ -3525,6 +3537,7 @@ VIR_ENUM_DECL(virDomainIOMMUModel);
 VIR_ENUM_DECL(virDomainVsockModel);
 VIR_ENUM_DECL(virDomainShmemModel);
 VIR_ENUM_DECL(virDomainLaunchSecurity);
+VIR_ENUM_DECL(virDomainResourceBackend);
 /* from libvirt.h */
 VIR_ENUM_DECL(virDomainState);
 VIR_ENUM_DECL(virDomainNostateReason);
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 6b305bdd0e..6e5cc201ff 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -575,6 +575,8 @@ virDomainRedirdevBusTypeToString;
 virDomainRedirdevDefFind;
 virDomainRedirdevDefFree;
 virDomainRedirdevDefRemove;
+virDomainResourceBackendTypeFromString;
+virDomainResourceBackendTypeToString;
 virDomainRNGBackendTypeToString;
 virDomainRNGDefFree;
 virDomainRNGFind;
diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c
index 7df723a4da..326d33981c 100644
--- a/src/lxc/lxc_cgroup.c
+++ b/src/lxc/lxc_cgroup.c
@@ -392,6 +392,14 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def,
     if (!machineName)
         goto cleanup;
 
+    if (def->resource->backend != VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Resource backend '%s' not available"),
+                       virDomainResourceBackendTypeToString(
+                           def->resource->backend));
+        goto cleanup;
+    }
+
     if (def->resource->partition[0] != '/') {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                        _("Resource partition '%s' must start with '/'"),
diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
index 5199f3806e..4ec3cc5619 100644
--- a/src/lxc/lxc_process.c
+++ b/src/lxc/lxc_process.c
@@ -1260,6 +1260,7 @@ int virLXCProcessStart(virConnectPtr conn,
         if (VIR_ALLOC(res) < 0)
             goto cleanup;
 
+        res->backend = VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT;
         res->partition = g_strdup("/machine");
 
         vm->def->resource = res;
diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c
index c0e30f6152..c407431f6b 100644
--- a/src/qemu/qemu_cgroup.c
+++ b/src/qemu/qemu_cgroup.c
@@ -940,11 +940,20 @@ qemuInitCgroup(virDomainObjPtr vm,
         if (VIR_ALLOC(res) < 0)
             goto cleanup;
 
+        res->backend = VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT;
         res->partition = g_strdup("/machine");
 
         vm->def->resource = res;
     }
 
+    if (vm->def->resource->backend != VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT) {
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                       _("Resource backend '%s' not available"),
+                       virDomainResourceBackendTypeToString(
+                           vm->def->resource->backend));
+        goto cleanup;
+    }
+
     if (vm->def->resource->partition[0] != '/') {
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                        _("Resource partition '%s' must start with '/'"),
@@ -1061,6 +1070,11 @@ qemuConnectCgroup(virDomainObjPtr vm)
     virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver);
     int ret = -1;
 
+    if (vm->def->resource &&
+        vm->def->resource->backend == VIR_DOMAIN_RESOURCE_BACKEND_NONE) {
+        goto done;
+    }
+
     if (!virQEMUDriverIsPrivileged(priv->driver))
         goto done;
 
diff --git a/tests/lxcxml2xmldata/lxc-capabilities.xml b/tests/lxcxml2xmldata/lxc-capabilities.xml
index 04d64e3e41..335fdf8b91 100644
--- a/tests/lxcxml2xmldata/lxc-capabilities.xml
+++ b/tests/lxcxml2xmldata/lxc-capabilities.xml
@@ -4,7 +4,7 @@
   <memory unit='KiB'>1048576</memory>
   <currentMemory unit='KiB'>1048576</currentMemory>
   <vcpu placement='static'>1</vcpu>
-  <resource>
+  <resource backend='cgroupfs'>
     <partition>/machine</partition>
   </resource>
   <os>
diff --git a/tests/lxcxml2xmldata/lxc-idmap.xml b/tests/lxcxml2xmldata/lxc-idmap.xml
index b477636c30..d618d69706 100644
--- a/tests/lxcxml2xmldata/lxc-idmap.xml
+++ b/tests/lxcxml2xmldata/lxc-idmap.xml
@@ -4,7 +4,7 @@
   <memory unit='KiB'>1048576</memory>
   <currentMemory unit='KiB'>1048576</currentMemory>
   <vcpu placement='static'>1</vcpu>
-  <resource>
+  <resource backend='machined'>
     <partition>/machine</partition>
   </resource>
   <os>
-- 
2.24.1





[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux