From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> Systemd uses a named cgroup mount for tracking processes. Add it as another type of controller, albeit one which we have to special case in a number of places. In particular we must never create/delete directories there, nor add tasks. Essentially the systemd mount is to be considered read-only for libvirt. Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- src/util/vircgroup.c | 68 +++++++++++++++++++++++++++++++++++++++------------ src/util/vircgroup.h | 1 + tests/vircgrouptest.c | 9 +++++++ 3 files changed, 63 insertions(+), 15 deletions(-) diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 68ec953..5ff8850 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -57,7 +57,8 @@ VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, "cpu", "cpuacct", "cpuset", "memory", "devices", - "freezer", "blkio", "net_cls", "perf_event"); + "freezer", "blkio", "net_cls", "perf_event", + "name=systemd"); typedef enum { VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */ @@ -115,6 +116,9 @@ virCgroupValidateMachineGroup(virCgroupPtr group, for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { char *tmp; + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + if (!group->controllers[i].placement) continue; @@ -320,6 +324,9 @@ static int virCgroupCopyPlacement(virCgroupPtr group, if (!group->controllers[i].mountPoint) continue; + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + if (path[0] == '/') { if (VIR_STRDUP(group->controllers[i].placement, path) < 0) return -1; @@ -375,6 +382,8 @@ static int virCgroupDetectPlacement(virCgroupPtr group, int ret = -1; char *procfile; + VIR_DEBUG("Detecting placement for pid %lld path %s", + (unsigned long long)pid, path); if (pid == -1) { if (VIR_STRDUP(procfile, "/proc/self/cgroup") < 0) goto cleanup; @@ -411,6 +420,7 @@ static int virCgroupDetectPlacement(virCgroupPtr group, const char *typestr = virCgroupControllerTypeToString(i); int typelen = strlen(typestr); char *tmp = controllers; + while (tmp) { char *next = strchr(tmp, ','); int len; @@ -427,13 +437,20 @@ static int virCgroupDetectPlacement(virCgroupPtr group, * selfpath=="/libvirt.service" + path="foo" -> "/libvirt.service/foo" */ if (typelen == len && STREQLEN(typestr, tmp, len) && - group->controllers[i].mountPoint != NULL) { - if (virAsprintf(&group->controllers[i].placement, - "%s%s%s", selfpath, - (STREQ(selfpath, "/") || - STREQ(path, "") ? "" : "/"), - path) < 0) - goto cleanup; + group->controllers[i].mountPoint != NULL && + group->controllers[i].placement == NULL) { + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) { + if (VIR_STRDUP(group->controllers[i].placement, + selfpath) < 0) + goto cleanup; + } else { + if (virAsprintf(&group->controllers[i].placement, + "%s%s%s", selfpath, + (STREQ(selfpath, "/") || + STREQ(path, "") ? "" : "/"), + path) < 0) + goto cleanup; + } } tmp = next; @@ -524,13 +541,16 @@ static int virCgroupDetect(virCgroupPtr group, return -1; } - if (parent || path[0] == '/') { - if (virCgroupCopyPlacement(group, path, parent) < 0) - return -1; - } else { - if (virCgroupDetectPlacement(group, pid, path) < 0) - return -1; - } + /* In some cases we can copy part of the placement info + * based on the parent cgroup... + */ + if ((parent || path[0] == '/') && + virCgroupCopyPlacement(group, path, parent) < 0) + return -1; + + /* ... but use /proc/cgroups to fill in the rest */ + if (virCgroupDetectPlacement(group, pid, path) < 0) + return -1; /* Check that for every mounted controller, we found our placement */ for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { @@ -822,6 +842,12 @@ static int virCgroupMakeGroup(virCgroupPtr parent, for (i = 0; i < VIR_CGROUP_CONTROLLER_LAST; i++) { char *path = NULL; + /* We must never mkdir() in systemd's hierachy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) { + VIR_DEBUG("Not creating systemd controller group"); + continue; + } + /* Skip over controllers that aren't mounted */ if (!group->controllers[i].mountPoint) { VIR_DEBUG("Skipping unmounted controller %s", @@ -1026,6 +1052,10 @@ int virCgroupRemove(virCgroupPtr group) if (!group->controllers[i].mountPoint) continue; + /* We must never rmdir() in systemd's hiearchy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + /* Don't delete the root group, if we accidentally ended up in it for some reason */ if (STREQ(group->controllers[i].placement, "/")) @@ -1065,6 +1095,10 @@ int virCgroupAddTask(virCgroupPtr group, pid_t pid) if (!group->controllers[i].mountPoint) continue; + /* We must never add tasks in systemd's hiearchy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + if (virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid) < 0) goto cleanup; } @@ -1166,6 +1200,10 @@ int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group) !dest_group->controllers[i].mountPoint) continue; + /* We must never move tasks in systemd's hiearchy */ + if (i == VIR_CGROUP_CONTROLLER_SYSTEMD) + continue; + /* New threads are created in the same group as their parent; * but if a thread is created after we first read we aren't * aware that it needs to move. Therefore, we must iterate diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h index 3aaf081..e579f41 100644 --- a/src/util/vircgroup.h +++ b/src/util/vircgroup.h @@ -40,6 +40,7 @@ enum { VIR_CGROUP_CONTROLLER_BLKIO, VIR_CGROUP_CONTROLLER_NET_CLS, VIR_CGROUP_CONTROLLER_PERF_EVENT, + VIR_CGROUP_CONTROLLER_SYSTEMD, VIR_CGROUP_CONTROLLER_LAST }; diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c index 20ac494..4bdd4c9 100644 --- a/tests/vircgrouptest.c +++ b/tests/vircgrouptest.c @@ -87,6 +87,7 @@ const char *mountsSmall[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, }; const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_CPU] = "/not/really/sys/fs/cgroup/cpu,cpuacct", @@ -96,6 +97,7 @@ const char *mountsFull[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/not/really/sys/fs/cgroup/freezer", [VIR_CGROUP_CONTROLLER_BLKIO] = "/not/really/sys/fs/cgroup/blkio", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/not/really/sys/fs/cgroup/systemd", }; const char *links[VIR_CGROUP_CONTROLLER_LAST] = { @@ -121,6 +123,7 @@ static int testCgroupNewForSelf(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/", [VIR_CGROUP_CONTROLLER_BLKIO] = "/", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if (virCgroupNewSelf(&cgroup) < 0) { @@ -161,6 +164,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = NULL, [VIR_CGROUP_CONTROLLER_BLKIO] = NULL, + [VIR_CGROUP_CONTROLLER_SYSTEMD] = NULL, }; const char *placementFull[VIR_CGROUP_CONTROLLER_LAST] = { [VIR_CGROUP_CONTROLLER_CPU] = "/virtualmachines.partition", @@ -170,6 +174,7 @@ static int testCgroupNewForPartition(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/virtualmachines.partition", [VIR_CGROUP_CONTROLLER_BLKIO] = "/virtualmachines.partition", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/virtualmachines", false, -1, &cgroup)) != -1) { @@ -233,6 +238,7 @@ static int testCgroupNewForPartitionNested(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/deployment.partition/production.partition", [VIR_CGROUP_CONTROLLER_BLKIO] = "/deployment.partition/production.partition", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/deployment/production", false, -1, &cgroup)) != -1) { @@ -281,6 +287,7 @@ static int testCgroupNewForPartitionNestedDeep(const void *args ATTRIBUTE_UNUSED [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/user/berrange.user/production.partition", [VIR_CGROUP_CONTROLLER_BLKIO] = "/user/berrange.user/production.partition", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/user/berrange.user/production", false, -1, &cgroup)) != -1) { @@ -336,6 +343,7 @@ static int testCgroupNewForPartitionDomain(const void *args ATTRIBUTE_UNUSED) [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/production.partition/foo.libvirt-lxc", [VIR_CGROUP_CONTROLLER_BLKIO] = "/production.partition/foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/production", true, -1, &partitioncgroup)) != 0) { @@ -372,6 +380,7 @@ static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNU [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, [VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", [VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_SYSTEMD] = "/user/berrange/123", }; if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) { -- 1.8.1.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list