From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> Currently when getting an instance of virCgroupPtr we will create the path in all cgroup controllers. Only at the virt driver layer are we attempting to filter controllers. This is bad because the mere act of creating the dirs in the controllers can have a functional impact on the kernel, particularly for performance. Update the virCgroupForDriver() method to accept a bitmask of controllers to use. Only create dirs in the controllers that are requested. When creating cgroups for domains, respect the active controller list from the parent cgroup Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- src/lxc/lxc_cgroup.c | 2 +- src/lxc/lxc_driver.c | 2 +- src/qemu/qemu_cgroup.c | 27 +++------ src/qemu/qemu_conf.c | 16 +---- src/qemu/qemu_driver.c | 3 +- src/util/vircgroup.c | 162 +++++++++++++++++++++++++++++-------------------- src/util/vircgroup.h | 6 +- 7 files changed, 113 insertions(+), 105 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 33c305a..33641f8 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -530,7 +530,7 @@ virCgroupPtr virLXCCgroupCreate(virDomainDefPtr def) int ret = -1; int rc; - rc = virCgroupForDriver("lxc", &driver, 1, 0); + rc = virCgroupForDriver("lxc", &driver, 1, 0, -1); if (rc != 0) { virReportSystemError(-rc, "%s", _("Unable to get cgroup for driver")); diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c index 654ab99..9c6f858 100644 --- a/src/lxc/lxc_driver.c +++ b/src/lxc/lxc_driver.c @@ -1460,7 +1460,7 @@ static int lxcStartup(bool privileged, lxc_driver->log_libvirtd = 0; /* by default log to container logfile */ lxc_driver->have_netns = lxcCheckNetNsSupport(); - rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1); + rc = virCgroupForDriver("lxc", &lxc_driver->cgroup, privileged, 1, -1); if (rc < 0) { char buf[1024] ATTRIBUTE_UNUSED; VIR_DEBUG("Unable to create cgroup for LXC driver: %s", diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index c9b4ca2..2cdc2b7 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -57,8 +57,6 @@ bool qemuCgroupControllerActive(virQEMUDriverPtr driver, goto cleanup; if (!virCgroupMounted(driver->cgroup, controller)) goto cleanup; - if (cfg->cgroupControllers & (1 << controller)) - ret = true; cleanup: virObjectUnref(cfg); @@ -668,7 +666,7 @@ int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, virDomainDefPtr def = vm->def; unsigned long long period = vm->def->cputune.emulator_period; long long quota = vm->def->cputune.emulator_quota; - int rc, i; + int rc; if ((period || quota) && (!driver->cgroup || @@ -697,22 +695,13 @@ int qemuSetupCgroupForEmulator(virQEMUDriverPtr driver, goto cleanup; } - for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { - if (i != VIR_CGROUP_CONTROLLER_CPU && - i != VIR_CGROUP_CONTROLLER_CPUACCT && - i != VIR_CGROUP_CONTROLLER_CPUSET) - continue; - - if (!qemuCgroupControllerActive(driver, i)) - continue; - rc = virCgroupMoveTask(cgroup, cgroup_emulator, i); - if (rc < 0) { - virReportSystemError(-rc, - _("Unable to move tasks from domain cgroup to " - "emulator cgroup in controller %d for %s"), - i, vm->def->name); - goto cleanup; - } + rc = virCgroupMoveTask(cgroup, cgroup_emulator); + if (rc < 0) { + virReportSystemError(-rc, + _("Unable to move tasks from domain cgroup to " + "emulator cgroup for %s"), + vm->def->name); + goto cleanup; } if (def->placement_mode == VIR_DOMAIN_CPU_PLACEMENT_MODE_AUTO) { diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c index c2e2e10..5a3dde0 100644 --- a/src/qemu/qemu_conf.c +++ b/src/qemu/qemu_conf.c @@ -134,14 +134,7 @@ virQEMUDriverConfigPtr virQEMUDriverConfigNew(bool privileged) } cfg->dynamicOwnership = privileged; - cfg->cgroupControllers = - (1 << VIR_CGROUP_CONTROLLER_CPU) | - (1 << VIR_CGROUP_CONTROLLER_DEVICES) | - (1 << VIR_CGROUP_CONTROLLER_MEMORY) | - (1 << VIR_CGROUP_CONTROLLER_BLKIO) | - (1 << VIR_CGROUP_CONTROLLER_CPUSET) | - (1 << VIR_CGROUP_CONTROLLER_CPUACCT); - + cfg->cgroupControllers = -1; /* -1 == auto-detect */ if (privileged) { if (virAsprintf(&cfg->logDir, @@ -454,6 +447,7 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, p = virConfGetValue(conf, "cgroup_controllers"); CHECK_TYPE("cgroup_controllers", VIR_CONF_LIST); if (p) { + cfg->cgroupControllers = 0; virConfValuePtr pp; for (i = 0, pp = p->list; pp; ++i, pp = pp->next) { int ctl; @@ -472,12 +466,6 @@ int virQEMUDriverConfigLoadFile(virQEMUDriverConfigPtr cfg, cfg->cgroupControllers |= (1 << ctl); } } - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - if (cfg->cgroupControllers & (1 << i)) { - VIR_INFO("Configured cgroup controller '%s'", - virCgroupControllerTypeToString(i)); - } - } p = virConfGetValue(conf, "cgroup_device_acl"); CHECK_TYPE("cgroup_device_acl", VIR_CONF_LIST); diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 552a81b..2809a77 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -628,7 +628,8 @@ qemuStartup(bool privileged, goto error; } - rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged, 1); + rc = virCgroupForDriver("qemu", &qemu_driver->cgroup, privileged, 1, + cfg->cgroupControllers); if (rc < 0) { VIR_INFO("Unable to create cgroup for driver: %s", virStrerror(-rc, ebuf, sizeof(ebuf))); diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 266cecb..085421e 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -70,8 +70,6 @@ typedef enum { * before creating subcgroups and * attaching tasks */ - VIR_CGROUP_VCPU = 1 << 1, /* create subdir only under the cgroup cpu, - * cpuacct and cpuset if possible. */ } virCgroupFlags; /** @@ -230,11 +228,12 @@ no_memory: } -static int virCgroupDetect(virCgroupPtr group) +static int virCgroupDetect(virCgroupPtr group, + int controllers) { - int any = 0; int rc; int i; + int j; rc = virCgroupDetectMounts(group); if (rc < 0) { @@ -242,14 +241,55 @@ static int virCgroupDetect(virCgroupPtr group) return rc; } - /* Check that at least 1 controller is available */ - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - if (group->controllers[i].mountPoint != NULL) - any = 1; + if (controllers >= 0) { + VIR_DEBUG("Validating controllers %d", controllers); + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + VIR_DEBUG("Controller '%s' wanted=%s", + virCgroupControllerTypeToString(i), + (1 << i) & controllers ? "yes" : "no"); + if (((1 << i) & controllers)) { + /* Ensure requested controller is present */ + if (!group->controllers[i].mountPoint) { + VIR_DEBUG("Requested controlled '%s' not mounted", + virCgroupControllerTypeToString(i)); + return -ENOENT; + } + } else { + /* Check whether a request to disable a controller + * clashes with co-mounting of controllers */ + for (j = 0 ; j < VIR_CGROUP_CONTROLLER_LAST ; j++) { + if (j == i) + continue; + if (!((1 << j) & controllers)) + continue; + + if (STREQ_NULLABLE(group->controllers[i].mountPoint, + group->controllers[j].mountPoint)) { + VIR_DEBUG("Controller '%s' is not wanted, but '%s' is co-mounted", + virCgroupControllerTypeToString(i), + virCgroupControllerTypeToString(j)); + return -EINVAL; + } + } + VIR_FREE(group->controllers[i].mountPoint); + } + } + } else { + VIR_DEBUG("Auto-detecting controllers"); + controllers = 0; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + VIR_DEBUG("Controller '%s' present=%s", + virCgroupControllerTypeToString(i), + group->controllers[i].mountPoint ? "yes" : "no"); + if (group->controllers[i].mountPoint == NULL) + continue; + controllers |= (1 << i); + } } - if (!any) - return -ENXIO; + /* Check that at least 1 controller is available */ + if (!controllers) + return -ENXIO; rc = virCgroupDetectPlacement(group); @@ -542,16 +582,6 @@ static int virCgroupMakeGroup(virCgroupPtr parent, if (!group->controllers[i].mountPoint) continue; - /* We need to control cpu bandwidth for each vcpu now */ - if ((flags & VIR_CGROUP_VCPU) && - (i != VIR_CGROUP_CONTROLLER_CPU && - i != VIR_CGROUP_CONTROLLER_CPUACCT && - i != VIR_CGROUP_CONTROLLER_CPUSET)) { - /* treat it as unmounted and we can use virCgroupAddTask */ - VIR_FREE(group->controllers[i].mountPoint); - continue; - } - rc = virCgroupPathOfController(group, i, "", &path); if (rc < 0) return rc; @@ -611,12 +641,13 @@ static int virCgroupMakeGroup(virCgroupPtr parent, static int virCgroupNew(const char *path, + int controllers, virCgroupPtr *group) { int rc = 0; char *typpath = NULL; - VIR_DEBUG("New group %s", path); + VIR_DEBUG("path=%s controllers=%d", path, controllers); *group = NULL; if (VIR_ALLOC((*group)) != 0) { @@ -629,7 +660,7 @@ static int virCgroupNew(const char *path, goto err; } - rc = virCgroupDetect(*group); + rc = virCgroupDetect(*group, controllers); if (rc < 0) goto err; @@ -645,17 +676,18 @@ err: static int virCgroupAppRoot(bool privileged, virCgroupPtr *group, - bool create) + bool create, + int controllers) { virCgroupPtr rootgrp = NULL; int rc; - rc = virCgroupNew("/", &rootgrp); + rc = virCgroupNew("/", controllers, &rootgrp); if (rc != 0) return rc; if (privileged) { - rc = virCgroupNew("/libvirt", group); + rc = virCgroupNew("/libvirt", controllers, group); } else { char *rootname; char *username; @@ -671,7 +703,7 @@ static int virCgroupAppRoot(bool privileged, goto cleanup; } - rc = virCgroupNew(rootname, group); + rc = virCgroupNew(rootname, controllers, group); VIR_FREE(rootname); } if (rc != 0) @@ -779,6 +811,7 @@ int virCgroupRemove(virCgroupPtr group) return rc; } + /** * virCgroupAddTask: * @@ -872,45 +905,30 @@ cleanup: * * Returns: 0 on success or -errno on failure */ -int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group, - int controller) +int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group) { - int rc = 0, err = 0; + int rc = 0; char *content = NULL; + int i; - if (controller < VIR_CGROUP_CONTROLLER_CPU || - controller > VIR_CGROUP_CONTROLLER_BLKIO) - return -EINVAL; - - if (!src_group->controllers[controller].mountPoint || - !dest_group->controllers[controller].mountPoint) { - return -EINVAL; - } - - rc = virCgroupGetValueStr(src_group, controller, "tasks", &content); - if (rc != 0) - return rc; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (!src_group->controllers[i].mountPoint || + !dest_group->controllers[i].mountPoint) + continue; - rc = virCgroupAddTaskStrController(dest_group, content, controller); - if (rc != 0) - goto cleanup; + rc = virCgroupGetValueStr(src_group, i, "tasks", &content); + if (rc != 0) + return rc; - VIR_FREE(content); + rc = virCgroupAddTaskStrController(dest_group, content, i); + if (rc != 0) + goto cleanup; - return 0; + VIR_FREE(content); + } cleanup: - /* - * We don't need to recover dest_cgroup because cgroup will make sure - * that one task only resides in one cgroup of the same controller. - */ - err = virCgroupAddTaskStrController(src_group, content, controller); - if (err != 0) - VIR_ERROR(_("Cannot recover cgroup %s from %s"), - src_group->controllers[controller].mountPoint, - dest_group->controllers[controller].mountPoint); VIR_FREE(content); - return rc; } @@ -926,13 +944,15 @@ cleanup: int virCgroupForDriver(const char *name, virCgroupPtr *group, bool privileged, - bool create) + bool create, + int controllers) { int rc; char *path = NULL; virCgroupPtr rootgrp = NULL; - rc = virCgroupAppRoot(privileged, &rootgrp, create); + rc = virCgroupAppRoot(privileged, &rootgrp, + create, controllers); if (rc != 0) goto out; @@ -941,7 +961,7 @@ int virCgroupForDriver(const char *name, goto out; } - rc = virCgroupNew(path, group); + rc = virCgroupNew(path, controllers, group); VIR_FREE(path); if (rc == 0) { @@ -979,7 +999,7 @@ int virCgroupForDriver(const char *name ATTRIBUTE_UNUSED, #if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R int virCgroupForSelf(virCgroupPtr *group) { - return virCgroupNew("/", group); + return virCgroupNew("/", -1, group); } #else int virCgroupForSelf(virCgroupPtr *group ATTRIBUTE_UNUSED) @@ -1012,7 +1032,7 @@ int virCgroupForDomain(virCgroupPtr driver, if (virAsprintf(&path, "%s/%s", driver->path, name) < 0) return -ENOMEM; - rc = virCgroupNew(path, group); + rc = virCgroupNew(path, -1, group); VIR_FREE(path); if (rc == 0) { @@ -1060,6 +1080,7 @@ int virCgroupForVcpu(virCgroupPtr driver, { int rc; char *path; + int controllers; if (driver == NULL) return -EINVAL; @@ -1067,11 +1088,15 @@ int virCgroupForVcpu(virCgroupPtr driver, if (virAsprintf(&path, "%s/vcpu%d", driver->path, vcpuid) < 0) return -ENOMEM; - rc = virCgroupNew(path, group); + controllers = ((1 << VIR_CGROUP_CONTROLLER_CPU) | + (1 << VIR_CGROUP_CONTROLLER_CPUACCT) | + (1 << VIR_CGROUP_CONTROLLER_CPUSET)); + + rc = virCgroupNew(path, controllers, group); VIR_FREE(path); if (rc == 0) { - rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); + rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_NONE); if (rc != 0) virCgroupFree(group); } @@ -1103,6 +1128,7 @@ int virCgroupForEmulator(virCgroupPtr driver, { int rc; char *path; + int controllers; if (driver == NULL) return -EINVAL; @@ -1110,11 +1136,15 @@ int virCgroupForEmulator(virCgroupPtr driver, if (virAsprintf(&path, "%s/emulator", driver->path) < 0) return -ENOMEM; - rc = virCgroupNew(path, group); + controllers = ((1 << VIR_CGROUP_CONTROLLER_CPU) | + (1 << VIR_CGROUP_CONTROLLER_CPUACCT) | + (1 << VIR_CGROUP_CONTROLLER_CPUSET)); + + rc = virCgroupNew(path, controllers, group); VIR_FREE(path); if (rc == 0) { - rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); + rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_NONE); if (rc != 0) virCgroupFree(group); } @@ -2014,7 +2044,7 @@ static int virCgroupKillRecursiveInternal(virCgroupPtr group, int signum, virHas goto cleanup; } - if ((rc = virCgroupNew(subpath, &subgroup)) != 0) + if ((rc = virCgroupNew(subpath, -1, &subgroup)) != 0) goto cleanup; if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, true)) < 0) diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 45a2006..725d2d0 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -47,7 +47,8 @@ VIR_ENUM_DECL(virCgroupController); int virCgroupForDriver(const char *name, virCgroupPtr *group, bool privileged, - bool create); + bool create, + int controllers); int virCgroupForSelf(virCgroupPtr *group); @@ -77,8 +78,7 @@ int virCgroupAddTaskController(virCgroupPtr group, int controller); int virCgroupMoveTask(virCgroupPtr src_group, - virCgroupPtr dest_group, - int controller); + virCgroupPtr dest_group); int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight); int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight); -- 1.8.1.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list