From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> Currently the QEMU driver has three ways of setting up cgroups. It either skips them entirely (if non-root), or uses systemd-machined, or uses cgroups directly. This change adds ability to configure the mechanism for registering resources between all these options explicitly. via <resource backend="none|cgroupfs|machined"/> It is further possible to register directly with systemd and bypass machined. We don't support this but systemd-nsspawn does and we ought to consider this at some point. This would involve a new "systemd" backend type alongside "machined". Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- docs/formatdomain.html.in | 24 +++++++++++- docs/schemas/domaincommon.rng | 17 +++++++-- src/conf/domain_conf.c | 46 ++++++++++++++++++----- src/conf/domain_conf.h | 13 +++++++ src/libvirt_private.syms | 2 + src/lxc/lxc_cgroup.c | 8 ++++ src/lxc/lxc_process.c | 1 + src/qemu/qemu_cgroup.c | 14 +++++++ tests/lxcxml2xmldata/lxc-capabilities.xml | 2 +- tests/lxcxml2xmldata/lxc-idmap.xml | 2 +- 10 files changed, 113 insertions(+), 16 deletions(-) diff --git a/docs/formatdomain.html.in b/docs/formatdomain.html.in index 0d229386eb..a016e789f1 100644 --- a/docs/formatdomain.html.in +++ b/docs/formatdomain.html.in @@ -1445,7 +1445,7 @@ </p> <pre> ... -<resource> +<resource backend='none|cgroupfs|machined'> <partition>/virtualmachines/production</partition> </resource> ... @@ -1455,8 +1455,30 @@ Resource partitions are currently supported by the QEMU and LXC drivers, which map partition paths to cgroups directories, in all mounted controllers. <span class="since">Since 1.0.5</span> + There is a choice of implementations to use for resource partitions + controlled via the optional <code>backend</code> attribute. + <span class="since">Since 6.2.0</span>. It accepts the values </p> + <ul> + <dt>none</dt> + <dd>Resource management in libvirt is disabled, with the APIs + returning an error indicating the functionality is not available. + The QEMU will will remain in whatever cgroup the libvirt daemon + was in. On systemd hosts, this will result in QEMU being + terminated at the same time as the privileged libvirt management + daemon which launched them.</dd> + <dt>cgroupfs</dt> + <dd>Cgroups will be directly created via the cgroups virtual filesystem. + This is not recommended for use in scenarios where systemd is in + charge of the cgroup hierarchy, unless the resource partition points + to a subtree that systemd has delegated administrative for.</dd> + <dt>machined</dt> + <dd>Systemd machined will be called to indirectly create cgroups. + This is recommended for any host where systemd is managing the + cgroup hierarchy.</dd> + </ul> + <h3><a id="elementsCPU">CPU model and topology</a></h3> <p> diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng index 6805420451..29ffc3a3cf 100644 --- a/docs/schemas/domaincommon.rng +++ b/docs/schemas/domaincommon.rng @@ -1126,9 +1126,20 @@ <define name="respartition"> <element name="resource"> - <element name="partition"> - <ref name="absFilePath"/> - </element> + <optional> + <attribute name="backend"> + <choice> + <value>none</value> + <value>cgroupfs</value> + <value>machined</value> + </choice> + </attribute> + </optional> + <optional> + <element name="partition"> + <ref name="absFilePath"/> + </element> + </optional> </element> </define> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index e0432fc47d..ae512283d0 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -1244,6 +1244,13 @@ VIR_ENUM_IMPL(virDomainOsDefFirmware, "efi", ); +VIR_ENUM_IMPL(virDomainResourceBackend, + VIR_DOMAIN_RESOURCE_BACKEND_LAST, + "default", + "none", + "cgroupfs", + "machined"); + /* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob); @@ -19100,17 +19107,24 @@ virDomainResourceDefParse(xmlNodePtr node, { VIR_XPATH_NODE_AUTORESTORE(ctxt); virDomainResourceDefPtr def = NULL; + g_autofree char *reg = NULL; ctxt->node = node; if (VIR_ALLOC(def) < 0) goto error; - /* Find out what type of virtualization to use */ - if (!(def->partition = virXPathString("string(./partition)", ctxt))) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("missing resource partition attribute")); - goto error; + def->partition = virXPathString("string(./partition)", ctxt); + + reg = virXMLPropString(node, "backend"); + if (reg != NULL) { + if ((def->backend = virDomainResourceBackendTypeFromString(reg)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Invalid backend attribute")); + goto error; + } + } else { + def->backend = VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT; } return def; @@ -27983,11 +27997,23 @@ static void virDomainResourceDefFormat(virBufferPtr buf, virDomainResourceDefPtr def) { - virBufferAddLit(buf, "<resource>\n"); - virBufferAdjustIndent(buf, 2); - virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); - virBufferAdjustIndent(buf, -2); - virBufferAddLit(buf, "</resource>\n"); + if (def->backend == VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT && + def->partition == NULL) + return; + + virBufferAddLit(buf, "<resource"); + if (def->backend != VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT) + virBufferAsprintf(buf, " backend='%s'", virDomainResourceBackendTypeToString(def->backend)); + + if (def->partition) { + virBufferAddLit(buf, ">\n"); + virBufferAdjustIndent(buf, 2); + virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</resource>\n"); + } else { + virBufferAddLit(buf, "/>\n"); + } } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 73bd097cf8..4bfda29dee 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2228,7 +2228,19 @@ struct _virDomainPanicDef { void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights, int ndevices); +typedef enum { + VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT, + VIR_DOMAIN_RESOURCE_BACKEND_NONE, + VIR_DOMAIN_RESOURCE_BACKEND_CGROUPFS, + VIR_DOMAIN_RESOURCE_BACKEND_MACHINED, + + VIR_DOMAIN_RESOURCE_BACKEND_LAST, +} virDomainResourceBackend; + +typedef struct _virDomainResourceDef virDomainResourceDef; +typedef virDomainResourceDef *virDomainResourceDefPtr; struct _virDomainResourceDef { + int backend; /* enum virDomainResourceBackend */ char *partition; }; @@ -3525,6 +3537,7 @@ VIR_ENUM_DECL(virDomainIOMMUModel); VIR_ENUM_DECL(virDomainVsockModel); VIR_ENUM_DECL(virDomainShmemModel); VIR_ENUM_DECL(virDomainLaunchSecurity); +VIR_ENUM_DECL(virDomainResourceBackend); /* from libvirt.h */ VIR_ENUM_DECL(virDomainState); VIR_ENUM_DECL(virDomainNostateReason); diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 6b305bdd0e..6e5cc201ff 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -575,6 +575,8 @@ virDomainRedirdevBusTypeToString; virDomainRedirdevDefFind; virDomainRedirdevDefFree; virDomainRedirdevDefRemove; +virDomainResourceBackendTypeFromString; +virDomainResourceBackendTypeToString; virDomainRNGBackendTypeToString; virDomainRNGDefFree; virDomainRNGFind; diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 7df723a4da..326d33981c 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -392,6 +392,14 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, if (!machineName) goto cleanup; + if (def->resource->backend != VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource backend '%s' not available"), + virDomainResourceBackendTypeToString( + def->resource->backend)); + goto cleanup; + } + if (def->resource->partition[0] != '/') { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Resource partition '%s' must start with '/'"), diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 5199f3806e..4ec3cc5619 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1260,6 +1260,7 @@ int virLXCProcessStart(virConnectPtr conn, if (VIR_ALLOC(res) < 0) goto cleanup; + res->backend = VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT; res->partition = g_strdup("/machine"); vm->def->resource = res; diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index c0e30f6152..c407431f6b 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -940,11 +940,20 @@ qemuInitCgroup(virDomainObjPtr vm, if (VIR_ALLOC(res) < 0) goto cleanup; + res->backend = VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT; res->partition = g_strdup("/machine"); vm->def->resource = res; } + if (vm->def->resource->backend != VIR_DOMAIN_RESOURCE_BACKEND_DEFAULT) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource backend '%s' not available"), + virDomainResourceBackendTypeToString( + vm->def->resource->backend)); + goto cleanup; + } + if (vm->def->resource->partition[0] != '/') { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Resource partition '%s' must start with '/'"), @@ -1061,6 +1070,11 @@ qemuConnectCgroup(virDomainObjPtr vm) virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver); int ret = -1; + if (vm->def->resource && + vm->def->resource->backend == VIR_DOMAIN_RESOURCE_BACKEND_NONE) { + goto done; + } + if (!virQEMUDriverIsPrivileged(priv->driver)) goto done; diff --git a/tests/lxcxml2xmldata/lxc-capabilities.xml b/tests/lxcxml2xmldata/lxc-capabilities.xml index 04d64e3e41..335fdf8b91 100644 --- a/tests/lxcxml2xmldata/lxc-capabilities.xml +++ b/tests/lxcxml2xmldata/lxc-capabilities.xml @@ -4,7 +4,7 @@ <memory unit='KiB'>1048576</memory> <currentMemory unit='KiB'>1048576</currentMemory> <vcpu placement='static'>1</vcpu> - <resource> + <resource backend='cgroupfs'> <partition>/machine</partition> </resource> <os> diff --git a/tests/lxcxml2xmldata/lxc-idmap.xml b/tests/lxcxml2xmldata/lxc-idmap.xml index b477636c30..d618d69706 100644 --- a/tests/lxcxml2xmldata/lxc-idmap.xml +++ b/tests/lxcxml2xmldata/lxc-idmap.xml @@ -4,7 +4,7 @@ <memory unit='KiB'>1048576</memory> <currentMemory unit='KiB'>1048576</currentMemory> <vcpu placement='static'>1</vcpu> - <resource> + <resource backend='machined'> <partition>/machine</partition> </resource> <os> -- 2.24.1