From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> If a user cgroup name begins with "cgroup.", "_" or with any of the controllers from /proc/cgroups followed by a dot, then they need to be prefixed with a single underscore. eg if there is an object "cpu.service", then this would end up as "_cpu.service" in the cgroup filesystem tree, however, "waldo.service" would stay "waldo.service", at least as long as nobody comes up with a cgroup controller called "waldo". Since we require a '.XXXX' suffix on all partitions, there is no scope for clashing with the kernel 'tasks' and 'release_agent' files. Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- src/util/vircgroup.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ tests/vircgroupmock.c | 27 +++++++++++++--- tests/vircgrouptest.c | 54 ++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 4 deletions(-) diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c index 297408d..f132e60 100644 --- a/src/util/vircgroup.c +++ b/src/util/vircgroup.c @@ -1092,6 +1092,84 @@ cleanup: #if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +static int virCgroupNeedEscape(const char *path) +{ + FILE *fp = NULL; + int ret = 0; + char *line = NULL; + size_t len; + + /* If it starts with 'cgroup.' or a '_' of any + * of the controller names from /proc/cgroups, + * then we must prefix a '_' + */ + if (STRPREFIX(path, "cgroup.")) + return 1; + + if (path[0] == '_') + return 1; + + if (!(fp = fopen("/proc/cgroups", "r"))) + return -errno; + + /* + * Data looks like this: + * #subsys_name hierarchy num_cgroups enabled + * cpuset 2 4 1 + * cpu 3 48 1 + * cpuacct 3 48 1 + * memory 4 4 1 + * devices 5 4 1 + * freezer 6 4 1 + * net_cls 7 1 1 + */ + while (getline(&line, &len, fp) > 0) { + if (STRPREFIX(line, "#subsys_name")) { + VIR_FREE(line); + continue; + } + char *tmp = strchr(line, ' '); + if (tmp) + *tmp = '\0'; + len = tmp - line; + + if (STRPREFIX(path, line) && + path[len] == '.') { + ret = 1; + VIR_FREE(line); + goto cleanup; + } + VIR_FREE(line); + } + + if (ferror(fp)) { + ret = -EIO; + goto cleanup; + } + +cleanup: + VIR_FORCE_FCLOSE(fp); + return ret; +} + +static int virCgroupEscape(char **path) +{ + size_t len = strlen(*path); + int rc; + + if ((rc = virCgroupNeedEscape(*path)) <= 0) + return rc; + + if (VIR_REALLOC_N(*path, len + 2) < 0) + return -ENOMEM; + + memmove((*path) + 1, + *path, + len + 1); + (*path)[0] = '_'; + return 0; +} + static char *virCgroupSetPartitionSuffix(const char *path) { char **tokens = virStringSplit(path, "/", 0); @@ -1123,6 +1201,11 @@ static char *virCgroupSetPartitionSuffix(const char *path) } strcat(tokens[i], ".partition"); } + + if (virCgroupEscape(&(tokens[i])) < 0) { + virReportOOMError(); + goto cleanup; + } } if (!(ret = virStringJoin((const char **)tokens, "/"))) @@ -1352,6 +1435,9 @@ int virCgroupNewDomainPartition(virCgroupPtr partition, name, driver) < 0) return -ENOMEM; + if ((rc = virCgroupEscape(&grpname)) < 0) + return rc; + rc = virCgroupNew(grpname, partition, -1, group); if (rc == 0) { diff --git a/tests/vircgroupmock.c b/tests/vircgroupmock.c index d6b0938..17ea75f 100644 --- a/tests/vircgroupmock.c +++ b/tests/vircgroupmock.c @@ -66,7 +66,7 @@ static char *fakesysfsdir; * Co-mounting cpu & cpuacct controllers * An anonymous controller for systemd */ -const char *mounts = +const char *procmounts = "rootfs / rootfs rw 0 0\n" "tmpfs /run tmpfs rw,seclabel,nosuid,nodev,mode=755 0 0\n" "tmpfs /not/really/sys/fs/cgroup tmpfs rw,seclabel,nosuid,nodev,noexec,mode=755 0 0\n" @@ -79,7 +79,7 @@ const char *mounts = "/dev/sda1 /boot ext4 rw,seclabel,relatime,data=ordered 0 0\n" "tmpfs /tmp tmpfs rw,seclabel,relatime,size=1024000k 0 0\n"; -const char *cgroups = +const char *procselfcgroups = "115:memory:/\n" "8:blkio:/\n" "6:freezer:/\n" @@ -87,6 +87,17 @@ const char *cgroups = "2:cpuset:/\n" "1:name=systemd:/user/berrange/123\n"; +const char *proccgroups = + "#subsys_name hierarchy num_cgroups enabled\n" + "cpuset 2 4 1\n" + "cpu 3 48 1\n" + "cpuacct 3 48 1\n" + "memory 4 4 1\n" + "devices 5 4 1\n" + "freezer 6 4 1\n" + "blkio 8 4 1\n"; + + static int make_file(const char *path, const char *name, const char *value) @@ -366,7 +377,15 @@ FILE *fopen(const char *path, const char *mode) if (STREQ(path, "/proc/mounts")) { if (STREQ(mode, "r")) { - return fmemopen((void *)mounts, strlen(mounts), mode); + return fmemopen((void *)procmounts, strlen(procmounts), mode); + } else { + errno = EACCES; + return NULL; + } + } + if (STREQ(path, "/proc/cgroups")) { + if (STREQ(mode, "r")) { + return fmemopen((void *)proccgroups, strlen(proccgroups), mode); } else { errno = EACCES; return NULL; @@ -374,7 +393,7 @@ FILE *fopen(const char *path, const char *mode) } if (STREQ(path, "/proc/self/cgroup")) { if (STREQ(mode, "r")) { - return fmemopen((void *)cgroups, strlen(cgroups), mode); + return fmemopen((void *)procselfcgroups, strlen(procselfcgroups), mode); } else { errno = EACCES; return NULL; diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c index b51106a..e2adef8 100644 --- a/tests/vircgrouptest.c +++ b/tests/vircgrouptest.c @@ -441,6 +441,58 @@ cleanup: return ret; } +static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNUSED) +{ + virCgroupPtr partitioncgroup1 = NULL; + virCgroupPtr partitioncgroup2 = NULL; + virCgroupPtr partitioncgroup3 = NULL; + virCgroupPtr domaincgroup = NULL; + int ret = -1; + int rv; + const char *placement[VIR_CGROUP_CONTROLLER_LAST] = { + [VIR_CGROUP_CONTROLLER_CPU] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_CPUACCT] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_CPUSET] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_MEMORY] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_DEVICES] = NULL, + [VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + [VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", + }; + + if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) { + fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv); + goto cleanup; + } + + if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil", true, -1, &partitioncgroup2)) != 0) { + fprintf(stderr, "Failed to create /cgroup.evil/cpu.evil cgroup: %d\n", -rv); + goto cleanup; + } + + if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil/_evil.evil", true, -1, &partitioncgroup3)) != 0) { + fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv); + goto cleanup; + } + + if ((rv = virCgroupNewDomainPartition(partitioncgroup3, "lxc", "cpu.foo", true, &domaincgroup)) != 0) { + fprintf(stderr, "Cannot create LXC cgroup: %d\n", -rv); + goto cleanup; + } + + /* NB we're not expecting 'net_cls.evil' to be escaped, + * since our fake /proc/cgroups pretends this controller + * isn't compiled into the kernel + */ + ret = validateCgroup(domaincgroup, "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", mountsFull, links, placement); + +cleanup: + virCgroupFree(&partitioncgroup3); + virCgroupFree(&partitioncgroup2); + virCgroupFree(&partitioncgroup1); + virCgroupFree(&domaincgroup); + return ret; +} + # define FAKESYSFSDIRTEMPLATE abs_builddir "/fakesysfsdir-XXXXXX" static int @@ -482,6 +534,8 @@ mymain(void) if (virtTestRun("New cgroup for domain partition", 1, testCgroupNewForPartitionDomain, NULL) < 0) ret = -1; + if (virtTestRun("New cgroup for domain partition escaped", 1, testCgroupNewForPartitionDomainEscaped, NULL) < 0) + ret = -1; if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL) virFileDeleteTree(fakesysfsdir); -- 1.8.1.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list