Currently the QEMU driver has three ways of setting up cgroups. It either skips them entirely (if non-root), or uses systemd-machined, or uses cgroups directly. It is further possible to register directly with systemd and bypass machined. We don't support this by systemd-nsspawn does and we ought to. This change adds ability to configure the mechanism for registering resources between all these options explicitly. via <resource register="none|direct|machined|systemd"/> Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- src/conf/domain_conf.c | 42 ++++++++++++++++++++++------- src/conf/domain_conf.h | 12 +++++++++ src/libvirt_private.syms | 2 ++ src/lxc/lxc_cgroup.c | 34 ++++++++++++++++++++++++ src/lxc/lxc_cgroup.h | 3 +++ src/lxc/lxc_process.c | 11 ++++---- src/qemu/qemu_cgroup.c | 69 +++++++++++++++++++++++++++++++++++++++++------- src/util/vircgroup.c | 55 ++++++++++++++++++++++++-------------- src/util/vircgroup.h | 10 ++++++- 9 files changed, 194 insertions(+), 44 deletions(-) diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c index 9a62bc472c..fb8e7a0ec7 100644 --- a/src/conf/domain_conf.c +++ b/src/conf/domain_conf.c @@ -910,6 +910,14 @@ VIR_ENUM_IMPL(virDomainHPTResizing, "required", ); +VIR_ENUM_IMPL(virDomainResourceRegister, + VIR_DOMAIN_RESOURCE_REGISTER_LAST, + "default", + "none", + "cgroup", + "machined", + "systemd"); + /* Internal mapping: subset of block job types that can be present in * <mirror> XML (remaining types are not two-phase). */ VIR_ENUM_DECL(virDomainBlockJob) @@ -17698,16 +17706,20 @@ virDomainResourceDefParse(xmlNodePtr node, { virDomainResourceDefPtr def = NULL; xmlNodePtr tmp = ctxt->node; + char *reg; ctxt->node = node; if (VIR_ALLOC(def) < 0) goto error; - /* Find out what type of virtualization to use */ - if (!(def->partition = virXPathString("string(./partition)", ctxt))) { - virReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("missing resource partition attribute")); + def->partition = virXPathString("string(./partition)", ctxt); + + reg = virXMLPropString(node, "register"); + if (reg != NULL && + (def->reg = virDomainResourceRegisterTypeFromString(reg)) <= 0) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Invalid register attribute")); goto error; } @@ -25568,11 +25580,23 @@ static void virDomainResourceDefFormat(virBufferPtr buf, virDomainResourceDefPtr def) { - virBufferAddLit(buf, "<resource>\n"); - virBufferAdjustIndent(buf, 2); - virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); - virBufferAdjustIndent(buf, -2); - virBufferAddLit(buf, "</resource>\n"); + if (def->reg == VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT && + def->partition == NULL) + return; + + virBufferAddLit(buf, "<resource"); + if (def->reg != VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT) + virBufferAsprintf(buf, " register='%s'", virDomainResourceRegisterTypeToString(def->reg)); + + if (def->partition) { + virBufferAddLit(buf, ">\n"); + virBufferAdjustIndent(buf, 2); + virBufferEscapeString(buf, "<partition>%s</partition>\n", def->partition); + virBufferAdjustIndent(buf, -2); + virBufferAddLit(buf, "</resource>\n"); + } else { + virBufferAddLit(buf, "/>\n"); + } } diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h index 6f7f96b3dd..a7a6628a36 100644 --- a/src/conf/domain_conf.h +++ b/src/conf/domain_conf.h @@ -2145,9 +2145,20 @@ struct _virDomainPanicDef { void virBlkioDeviceArrayClear(virBlkioDevicePtr deviceWeights, int ndevices); +typedef enum { + VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT, + VIR_DOMAIN_RESOURCE_REGISTER_NONE, + VIR_DOMAIN_RESOURCE_REGISTER_CGROUP, + VIR_DOMAIN_RESOURCE_REGISTER_MACHINED, + VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD, + + VIR_DOMAIN_RESOURCE_REGISTER_LAST, +} virDomainResourceRegister; + typedef struct _virDomainResourceDef virDomainResourceDef; typedef virDomainResourceDef *virDomainResourceDefPtr; struct _virDomainResourceDef { + int reg; /* enum virDomainResourceRegister */ char *partition; }; @@ -3325,6 +3336,7 @@ VIR_ENUM_DECL(virDomainMemorySource) VIR_ENUM_DECL(virDomainMemoryAllocation) VIR_ENUM_DECL(virDomainIOMMUModel) VIR_ENUM_DECL(virDomainShmemModel) +VIR_ENUM_DECL(virDomainResourceRegister) /* from libvirt.h */ VIR_ENUM_DECL(virDomainState) VIR_ENUM_DECL(virDomainNostateReason) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index d5c3b9abb5..a0fde65dba 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -489,6 +489,8 @@ virDomainRedirdevBusTypeToString; virDomainRedirdevDefFind; virDomainRedirdevDefFree; virDomainRedirdevDefRemove; +virDomainResourceRegisterTypeFromString; +virDomainResourceRegisterTypeToString; virDomainRNGBackendTypeToString; virDomainRNGDefFree; virDomainRNGFind; diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 3369801870..7bd479df1b 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -478,6 +478,35 @@ static int virLXCCgroupSetupDeviceACL(virDomainDefPtr def, return ret; } +int virLXCCgroupMode(virDomainResourceRegister reg, + virCgroupRegister *cgreg) +{ + switch (reg) { + case VIR_DOMAIN_RESOURCE_REGISTER_NONE: + goto unsupported; + case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT: + *cgreg = VIR_CGROUP_REGISTER_DEFAULT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED: + *cgreg = VIR_CGROUP_REGISTER_MACHINED; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP: + *cgreg = VIR_CGROUP_REGISTER_DIRECT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD: + default: + goto unsupported; + } + + return 0; + + unsupported: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource register '%s' not available"), + virDomainResourceRegisterTypeToString(reg)); + return -1; +} + virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, pid_t initpid, @@ -485,11 +514,15 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, int *nicindexes) { virCgroupPtr cgroup = NULL; + virCgroupRegister reg; char *machineName = virLXCDomainGetMachineName(def, 0); if (!machineName) goto cleanup; + if (virLXCCgroupMode(def->resource->reg, ®) < 0) + goto cleanup; + if (def->resource->partition[0] != '/') { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, _("Resource partition '%s' must start with '/'"), @@ -504,6 +537,7 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, initpid, true, nnicindexes, nicindexes, + ®, def->resource->partition, -1, &cgroup) < 0) diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index e85f21c47d..979d6a154b 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -27,6 +27,9 @@ # include "lxc_fuse.h" # include "virusb.h" +int virLXCCgroupMode(virDomainResourceRegister reg, + virCgroupRegister *cgreg); + virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, pid_t initpid, size_t nnicindexes, diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index efd8a69000..24aa0cb0bf 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1166,6 +1166,7 @@ virLXCProcessEnsureRootFS(virDomainObjPtr vm) return -1; } + /** * virLXCProcessStart: * @conn: pointer to connection @@ -1260,13 +1261,13 @@ int virLXCProcessStart(virConnectPtr conn, if (VIR_ALLOC(res) < 0) goto cleanup; - if (VIR_STRDUP(res->partition, "/machine") < 0) { - VIR_FREE(res); - goto cleanup; - } - vm->def->resource = res; } + if (vm->def->resource->reg != VIR_DOMAIN_RESOURCE_REGISTER_NONE && + !vm->def->resource->partition) { + if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0) + goto cleanup; + } if (virAsprintf(&logfile, "%s/%s.log", cfg->logDir, vm->def->name) < 0) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 19252ea239..5167d7fee1 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -834,6 +834,46 @@ qemuSetupCpuCgroup(virDomainObjPtr vm) } +static int qemuGetCgroupMode(virDomainObjPtr vm, + virDomainResourceRegister reg, + virCgroupRegister *cgreg) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + bool avail = virQEMUDriverIsPrivileged(priv->driver) && + virCgroupAvailable(); + + switch (reg) { + case VIR_DOMAIN_RESOURCE_REGISTER_NONE: + return 0; + case VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT: + if (!avail) + return 0; + *cgreg = VIR_CGROUP_REGISTER_DEFAULT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_MACHINED: + if (!avail) + goto unsupported; + *cgreg = VIR_CGROUP_REGISTER_MACHINED; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_CGROUP: + if (!avail) + goto unsupported; + *cgreg = VIR_CGROUP_REGISTER_DIRECT; + break; + case VIR_DOMAIN_RESOURCE_REGISTER_SYSTEMD: + default: + goto unsupported; + } + + return 1; + + unsupported: + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource register '%s' not available"), + virDomainResourceRegisterTypeToString(reg)); + return -1; +} + static int qemuInitCgroup(virDomainObjPtr vm, size_t nnicindexes, @@ -842,11 +882,17 @@ qemuInitCgroup(virDomainObjPtr vm, int ret = -1; qemuDomainObjPrivatePtr priv = vm->privateData; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver); + virCgroupRegister reg; + int rv; - if (!virQEMUDriverIsPrivileged(priv->driver)) - goto done; - - if (!virCgroupAvailable()) + rv = qemuGetCgroupMode(vm, + vm->def->resource ? + vm->def->resource->reg : + VIR_DOMAIN_RESOURCE_REGISTER_DEFAULT, + ®); + if (rv < 0) + goto cleanup; + if (rv == 0) goto done; virCgroupFree(&priv->cgroup); @@ -857,13 +903,12 @@ qemuInitCgroup(virDomainObjPtr vm, if (VIR_ALLOC(res) < 0) goto cleanup; - if (VIR_STRDUP(res->partition, "/machine") < 0) { - VIR_FREE(res); - goto cleanup; - } - vm->def->resource = res; } + if (!vm->def->resource->partition) { + if (VIR_STRDUP(vm->def->resource->partition, "/machine") < 0) + goto cleanup; + } if (vm->def->resource->partition[0] != '/') { virReportError(VIR_ERR_CONFIG_UNSUPPORTED, @@ -879,6 +924,7 @@ qemuInitCgroup(virDomainObjPtr vm, vm->pid, false, nnicindexes, nicindexes, + ®, vm->def->resource->partition, cfg->cgroupControllers, &priv->cgroup) < 0) { @@ -980,6 +1026,11 @@ qemuConnectCgroup(virDomainObjPtr vm) virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(priv->driver); int ret = -1; + if (vm->def->resource && + vm->def->resource->reg == VIR_DOMAIN_RESOURCE_REGISTER_NONE) { + goto done; + } + if (!virQEMUDriverIsPrivileged(priv->driver)) goto done; diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 0a31947b0d..07ffb78c78 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1733,6 +1733,7 @@ virCgroupNewMachine(const char *name, bool isContainer, size_t nnicindexes, int *nicindexes, + virCgroupRegister *reg, const char *partition, int controllers, virCgroupPtr *group) @@ -1741,28 +1742,42 @@ virCgroupNewMachine(const char *name, *group = NULL; - if ((rv = virCgroupNewMachineSystemd(name, - drivername, - uuid, - rootdir, - pidleader, - isContainer, - nnicindexes, - nicindexes, - partition, - controllers, - group)) == 0) - return 0; + if (*reg == VIR_CGROUP_REGISTER_DEFAULT || + *reg == VIR_CGROUP_REGISTER_MACHINED) { + if ((rv = virCgroupNewMachineSystemd(name, + drivername, + uuid, + rootdir, + pidleader, + isContainer, + nnicindexes, + nicindexes, + partition, + controllers, + group)) == 0) { + *reg = VIR_CGROUP_REGISTER_MACHINED; + return 0; + } - if (rv == -1) - return -1; + if (rv == -1) + return -1; + + if (*reg == VIR_CGROUP_REGISTER_MACHINED) { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + "%s", _("Systemd machined requested, but not available")); + return -1; + } + } - return virCgroupNewMachineManual(name, - drivername, - pidleader, - partition, - controllers, - group); + rv = virCgroupNewMachineManual(name, + drivername, + pidleader, + partition, + controllers, + group); + if (rv == 0) + *reg = VIR_CGROUP_REGISTER_DIRECT; + return rv; } diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index d833927678..63ee1aba5c 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -46,7 +46,8 @@ enum { VIR_CGROUP_CONTROLLER_LAST }; -VIR_ENUM_DECL(virCgroupController); +VIR_ENUM_DECL(virCgroupController) + /* Items of this enum are used later in virCgroupNew to create * bit array stored in int. Like this: * 1 << VIR_CGROUP_CONTROLLER_CPU @@ -103,6 +104,12 @@ virCgroupNewDetectMachine(const char *name, virCgroupPtr *group) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); +typedef enum { + VIR_CGROUP_REGISTER_DEFAULT, + VIR_CGROUP_REGISTER_DIRECT, + VIR_CGROUP_REGISTER_MACHINED, +} virCgroupRegister; + int virCgroupNewMachine(const char *name, const char *drivername, const unsigned char *uuid, @@ -111,6 +118,7 @@ int virCgroupNewMachine(const char *name, bool isContainer, size_t nnicindexes, int *nicindexes, + virCgroupRegister *reg, const char *partition, int controllers, virCgroupPtr *group) -- 2.14.3 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list