From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> Historically QEMU/LXC guests have been placed in a cgroup layout that is $LOCATION-OF-LIBVIRTD/libvirt/{qemu,lxc}/$VMNAME This is bad for a number of reasons - The cgroup hierarchy gets very deep which seriously impacts kernel performance due to cgroups scalability limitations. - It is hard to setup cgroup policies which apply across services and virtual machines, since all VMs are underneath the libvirtd service. To address this the default cgroup location is changed to be /system/$VMNAME.{lxc,qemu}.libvirt This puts virtual machines at the same level in the hierarchy as system services, allowing consistent policy to be setup across all of them. This also honours the new resource partition location from the XML configuration, for example <resource> <partition>/virtualmachines/production</partitions> </resource> will result in the VM being placed at /virtualmachines/production/$VMNAME.{lxc,qemu}.libvirt NB, with the exception of the default, /system, path which is intended to always exist, libvirt will not attempt to auto-create the partitions in the XML. It is the responsibility of the admin/app to configure the partitions. Later libvirt APIs will provide a way todo this. Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- src/lxc/lxc_cgroup.c | 91 +++++++++++++++++++++++++++++++------- src/lxc/lxc_cgroup.h | 2 +- src/lxc/lxc_process.c | 4 +- src/qemu/qemu_cgroup.c | 114 +++++++++++++++++++++++++++++++++++++----------- src/qemu/qemu_cgroup.h | 3 +- src/qemu/qemu_process.c | 2 +- 6 files changed, 169 insertions(+), 47 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 72940bd..8f19057 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -523,29 +523,88 @@ cleanup: } -virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def) +virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup) { - virCgroupPtr driver = NULL; - virCgroupPtr cgroup = NULL; int rc; + virCgroupPtr parent = NULL; + virCgroupPtr cgroup = NULL; - rc = virCgroupNewDriver("lxc", true, false, -1, &driver); - if (rc != 0) { - virReportSystemError(-rc, "%s", - _("Unable to get cgroup for driver")); - goto cleanup; + if (!def->resource && startup) { + virDomainResourceDefPtr res; + + if (VIR_ALLOC(res) < 0) { + virReportOOMError(); + goto cleanup; + } + + if (!(res->partition = strdup("/system"))) { + virReportOOMError(); + VIR_FREE(res); + goto cleanup; + } + + def->resource = res; } - rc = virCgroupNewDomainDriver(driver, def->name, true, &cgroup); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to create cgroup for domain %s"), - def->name); - goto cleanup; + if (def->resource && + def->resource->partition) { + if (def->resource->partition[0] != '/') { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + def->resource->partition); + goto cleanup; + } + /* We only auto-create the default partition. In other + * cases we expec the sysadmin/app to have done so */ + rc = virCgroupNewPartition(def->resource->partition, + STREQ(def->resource->partition, "/system"), + -1, + &parent); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to initialize %s cgroup"), + def->resource->partition); + goto cleanup; + } + + rc = virCgroupNewDomainPartition(parent, + "lxc", + def->name, + true, + &cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + def->name); + goto cleanup; + } + } else { + rc = virCgroupNewDriver("lxc", + true, + true, + -1, + &parent); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + def->name); + goto cleanup; + } + + rc = virCgroupNewDomainDriver(parent, + def->name, + true, + &cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + def->name); + goto cleanup; + } } cleanup: - virCgroupFree(&driver); + virCgroupFree(&parent); return cgroup; } @@ -556,7 +615,7 @@ virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def) int ret = -1; int rc; - if (!(cgroup = virLXCCgroupCreate(def))) + if (!(cgroup = virLXCCgroupCreate(def, true))) return NULL; rc = virCgroupAddTask(cgroup, getpid()); diff --git a/src/lxc/lxc_cgroup.h b/src/lxc/lxc_cgroup.h index 25a427c..f040de2 100644 --- a/src/lxc/lxc_cgroup.h +++ b/src/lxc/lxc_cgroup.h @@ -27,7 +27,7 @@ # include "lxc_fuse.h" # include "virusb.h" -virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def); +virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def, bool startup); virCgroupPtr virLXCCgroupJoin(virDomainDefPtr def); int virLXCCgroupSetup(virDomainDefPtr def, virCgroupPtr cgroup, diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c index 1bbffa3..ab07a1e 100644 --- a/src/lxc/lxc_process.c +++ b/src/lxc/lxc_process.c @@ -1049,7 +1049,7 @@ int virLXCProcessStart(virConnectPtr conn, virCgroupFree(&priv->cgroup); - if (!(priv->cgroup = virLXCCgroupCreate(vm->def))) + if (!(priv->cgroup = virLXCCgroupCreate(vm->def, true))) return -1; if (!virCgroupHasController(priv->cgroup, @@ -1464,7 +1464,7 @@ virLXCProcessReconnectDomain(virDomainObjPtr vm, if (!(priv->monitor = virLXCProcessConnectMonitor(driver, vm))) goto error; - if (!(priv->cgroup = virLXCCgroupCreate(vm->def))) + if (!(priv->cgroup = virLXCCgroupCreate(vm->def, false))) goto error; if (virLXCUpdateActiveUsbHostdevs(driver, vm->def) < 0) diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index cb0faa1..db9aafe 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -188,46 +188,108 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev ATTRIBUTE_UNUSED, int qemuInitCgroup(virQEMUDriverPtr driver, - virDomainObjPtr vm) + virDomainObjPtr vm, + bool startup) { - int rc; + int rc = -1; qemuDomainObjPrivatePtr priv = vm->privateData; - virCgroupPtr driverGroup = NULL; + virCgroupPtr parent = NULL; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); virCgroupFree(&priv->cgroup); - rc = virCgroupNewDriver("qemu", - cfg->privileged, - true, - cfg->cgroupControllers, - &driverGroup); - if (rc != 0) { - if (rc == -ENXIO || - rc == -EPERM || - rc == -EACCES) { /* No cgroups mounts == success */ - VIR_DEBUG("No cgroups present/configured/accessible, ignoring error"); - goto done; + if (!vm->def->resource && startup) { + virDomainResourceDefPtr res; + + if (VIR_ALLOC(res) < 0) { + virReportOOMError(); + goto cleanup; } - virReportSystemError(-rc, - _("Unable to create cgroup for %s"), - vm->def->name); - goto cleanup; + if (!(res->partition = strdup("/system"))) { + virReportOOMError(); + VIR_FREE(res); + goto cleanup; + } + + vm->def->resource = res; } - rc = virCgroupNewDomainDriver(driverGroup, vm->def->name, true, &priv->cgroup); - if (rc != 0) { - virReportSystemError(-rc, - _("Unable to create cgroup for %s"), - vm->def->name); - goto cleanup; + if (vm->def->resource && + vm->def->resource->partition) { + if (vm->def->resource->partition[0] != '/') { + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, + _("Resource partition '%s' must start with '/'"), + vm->def->resource->partition); + goto cleanup; + } + /* We only auto-create the default partition. In other + * cases we expec the sysadmin/app to have done so */ + rc = virCgroupNewPartition(vm->def->resource->partition, + STREQ(vm->def->resource->partition, "/system"), + cfg->cgroupControllers, + &parent); + if (rc != 0) { + if (rc == -ENXIO || + rc == -EPERM || + rc == -EACCES) { /* No cgroups mounts == success */ + VIR_DEBUG("No cgroups present/configured/accessible, ignoring error"); + goto done; + } + + virReportSystemError(-rc, + _("Unable to initialize %s cgroup"), + vm->def->resource->partition); + goto cleanup; + } + + rc = virCgroupNewDomainPartition(parent, + "qemu", + vm->def->name, + true, + &priv->cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + vm->def->name); + goto cleanup; + } + } else { + rc = virCgroupNewDriver("qemu", + cfg->privileged, + true, + cfg->cgroupControllers, + &parent); + if (rc != 0) { + if (rc == -ENXIO || + rc == -EPERM || + rc == -EACCES) { /* No cgroups mounts == success */ + VIR_DEBUG("No cgroups present/configured/accessible, ignoring error"); + goto done; + } + + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + vm->def->name); + goto cleanup; + } + + rc = virCgroupNewDomainDriver(parent, + vm->def->name, + true, + &priv->cgroup); + if (rc != 0) { + virReportSystemError(-rc, + _("Unable to create cgroup for %s"), + vm->def->name); + goto cleanup; + } } done: rc = 0; cleanup: - virCgroupFree(&driverGroup); + virCgroupFree(&parent); virObjectUnref(cfg); return rc; } @@ -246,7 +308,7 @@ int qemuSetupCgroup(virQEMUDriverPtr driver, (const char *const *)cfg->cgroupDeviceACL : defaultDeviceACL; - if (qemuInitCgroup(driver, vm) < 0) + if (qemuInitCgroup(driver, vm, true) < 0) return -1; if (!priv->cgroup) diff --git a/src/qemu/qemu_cgroup.h b/src/qemu/qemu_cgroup.h index 6cbfebc..e63f443 100644 --- a/src/qemu/qemu_cgroup.h +++ b/src/qemu/qemu_cgroup.h @@ -37,7 +37,8 @@ int qemuSetupHostUsbDeviceCgroup(virUSBDevicePtr dev, const char *path, void *opaque); int qemuInitCgroup(virQEMUDriverPtr driver, - virDomainObjPtr vm); + virDomainObjPtr vm, + bool startup); int qemuSetupCgroup(virQEMUDriverPtr driver, virDomainObjPtr vm, virBitmapPtr nodemask); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index da47b43..ce9f501 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3005,7 +3005,7 @@ qemuProcessReconnect(void *opaque) if (qemuUpdateActiveUsbHostdevs(driver, obj->def) < 0) goto error; - if (qemuInitCgroup(driver, obj) < 0) + if (qemuInitCgroup(driver, obj, false) < 0) goto error; /* XXX: Need to change as long as lock is introduced for -- 1.8.1.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list