[PATCH 3/3] Do proper escaping of cgroup resource partitions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: "Daniel P. Berrange" <berrange@xxxxxxxxxx>

If a user cgroup name begins with "cgroup.", "_" or with any of
the controllers from /proc/cgroups followed by a dot, then they
need to be prefixed with a single underscore. eg if there is
an object "cpu.service", then this would end up as "_cpu.service"
in the cgroup filesystem tree, however, "waldo.service" would
stay "waldo.service", at least as long as nobody comes up with
a cgroup controller called "waldo".

Since we require a '.XXXX' suffix on all partitions, there is
no scope for clashing with the kernel 'tasks' and 'release_agent'
files.

Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx>
---
 src/util/vircgroup.c  | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/vircgroupmock.c | 27 +++++++++++++---
 tests/vircgrouptest.c | 54 ++++++++++++++++++++++++++++++++
 3 files changed, 163 insertions(+), 4 deletions(-)

diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c
index 297408d..f132e60 100644
--- a/src/util/vircgroup.c
+++ b/src/util/vircgroup.c
@@ -1092,6 +1092,84 @@ cleanup:
 
 
 #if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R
+static int virCgroupNeedEscape(const char *path)
+{
+    FILE *fp = NULL;
+    int ret = 0;
+    char *line = NULL;
+    size_t len;
+
+    /* If it starts with 'cgroup.' or a '_' of any
+     * of the controller names from /proc/cgroups,
+     * then we must prefix a '_'
+     */
+    if (STRPREFIX(path, "cgroup."))
+        return 1;
+
+    if (path[0] == '_')
+        return 1;
+
+    if (!(fp = fopen("/proc/cgroups", "r")))
+        return -errno;
+
+    /*
+     * Data looks like this:
+     * #subsys_name hierarchy num_cgroups enabled
+     * cpuset  2 4  1
+     * cpu     3 48 1
+     * cpuacct 3 48 1
+     * memory  4 4  1
+     * devices 5 4  1
+     * freezer 6 4  1
+     * net_cls 7 1  1
+     */
+    while (getline(&line, &len, fp) > 0) {
+        if (STRPREFIX(line, "#subsys_name")) {
+            VIR_FREE(line);
+            continue;
+        }
+        char *tmp = strchr(line, ' ');
+        if (tmp)
+            *tmp = '\0';
+        len = tmp - line;
+
+        if (STRPREFIX(path, line) &&
+            path[len] == '.') {
+            ret = 1;
+            VIR_FREE(line);
+            goto cleanup;
+        }
+        VIR_FREE(line);
+    }
+
+    if (ferror(fp)) {
+        ret = -EIO;
+        goto cleanup;
+    }
+
+cleanup:
+    VIR_FORCE_FCLOSE(fp);
+    return ret;
+}
+
+static int virCgroupEscape(char **path)
+{
+    size_t len = strlen(*path);
+    int rc;
+
+    if ((rc = virCgroupNeedEscape(*path)) <= 0)
+        return rc;
+
+    if (VIR_REALLOC_N(*path, len + 2) < 0)
+        return -ENOMEM;
+
+    memmove((*path) + 1,
+            *path,
+            len + 1);
+    (*path)[0] = '_';
+    return 0;
+}
+
 static char *virCgroupSetPartitionSuffix(const char *path)
 {
     char **tokens = virStringSplit(path, "/", 0);
@@ -1123,6 +1201,11 @@ static char *virCgroupSetPartitionSuffix(const char *path)
             }
             strcat(tokens[i], ".partition");
         }
+
+        if (virCgroupEscape(&(tokens[i])) < 0) {
+            virReportOOMError();
+            goto cleanup;
+        }
     }
 
     if (!(ret = virStringJoin((const char **)tokens, "/")))
@@ -1352,6 +1435,9 @@ int virCgroupNewDomainPartition(virCgroupPtr partition,
                     name, driver) < 0)
         return -ENOMEM;
 
+    if ((rc = virCgroupEscape(&grpname)) < 0)
+        return rc;
+
     rc = virCgroupNew(grpname, partition, -1, group);
 
     if (rc == 0) {
diff --git a/tests/vircgroupmock.c b/tests/vircgroupmock.c
index d6b0938..17ea75f 100644
--- a/tests/vircgroupmock.c
+++ b/tests/vircgroupmock.c
@@ -66,7 +66,7 @@ static char *fakesysfsdir;
  * Co-mounting cpu & cpuacct controllers
  * An anonymous controller for systemd
  */
-const char *mounts =
+const char *procmounts =
     "rootfs / rootfs rw 0 0\n"
     "tmpfs /run tmpfs rw,seclabel,nosuid,nodev,mode=755 0 0\n"
     "tmpfs /not/really/sys/fs/cgroup tmpfs rw,seclabel,nosuid,nodev,noexec,mode=755 0 0\n"
@@ -79,7 +79,7 @@ const char *mounts =
     "/dev/sda1 /boot ext4 rw,seclabel,relatime,data=ordered 0 0\n"
     "tmpfs /tmp tmpfs rw,seclabel,relatime,size=1024000k 0 0\n";
 
-const char *cgroups =
+const char *procselfcgroups =
     "115:memory:/\n"
     "8:blkio:/\n"
     "6:freezer:/\n"
@@ -87,6 +87,17 @@ const char *cgroups =
     "2:cpuset:/\n"
     "1:name=systemd:/user/berrange/123\n";
 
+const char *proccgroups =
+    "#subsys_name    hierarchy       num_cgroups     enabled\n"
+    "cpuset  2       4       1\n"
+    "cpu     3       48      1\n"
+    "cpuacct 3       48      1\n"
+    "memory  4       4       1\n"
+    "devices 5       4       1\n"
+    "freezer 6       4       1\n"
+    "blkio   8       4       1\n";
+
+
 static int make_file(const char *path,
                      const char *name,
                      const char *value)
@@ -366,7 +377,15 @@ FILE *fopen(const char *path, const char *mode)
 
     if (STREQ(path, "/proc/mounts")) {
         if (STREQ(mode, "r")) {
-            return fmemopen((void *)mounts, strlen(mounts), mode);
+            return fmemopen((void *)procmounts, strlen(procmounts), mode);
+        } else {
+            errno = EACCES;
+            return NULL;
+        }
+    }
+    if (STREQ(path, "/proc/cgroups")) {
+        if (STREQ(mode, "r")) {
+            return fmemopen((void *)proccgroups, strlen(proccgroups), mode);
         } else {
             errno = EACCES;
             return NULL;
@@ -374,7 +393,7 @@ FILE *fopen(const char *path, const char *mode)
     }
     if (STREQ(path, "/proc/self/cgroup")) {
         if (STREQ(mode, "r")) {
-            return fmemopen((void *)cgroups, strlen(cgroups), mode);
+            return fmemopen((void *)procselfcgroups, strlen(procselfcgroups), mode);
         } else {
             errno = EACCES;
             return NULL;
diff --git a/tests/vircgrouptest.c b/tests/vircgrouptest.c
index b51106a..e2adef8 100644
--- a/tests/vircgrouptest.c
+++ b/tests/vircgrouptest.c
@@ -441,6 +441,58 @@ cleanup:
     return ret;
 }
 
+static int testCgroupNewForPartitionDomainEscaped(const void *args ATTRIBUTE_UNUSED)
+{
+    virCgroupPtr partitioncgroup1 = NULL;
+    virCgroupPtr partitioncgroup2 = NULL;
+    virCgroupPtr partitioncgroup3 = NULL;
+    virCgroupPtr domaincgroup = NULL;
+    int ret = -1;
+    int rv;
+    const char *placement[VIR_CGROUP_CONTROLLER_LAST] = {
+        [VIR_CGROUP_CONTROLLER_CPU] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+        [VIR_CGROUP_CONTROLLER_CPUACCT] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+        [VIR_CGROUP_CONTROLLER_CPUSET] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+        [VIR_CGROUP_CONTROLLER_MEMORY] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+        [VIR_CGROUP_CONTROLLER_DEVICES] = NULL,
+        [VIR_CGROUP_CONTROLLER_FREEZER] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+        [VIR_CGROUP_CONTROLLER_BLKIO] = "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc",
+    };
+
+    if ((rv = virCgroupNewPartition("/cgroup.evil", true, -1, &partitioncgroup1)) != 0) {
+        fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv);
+        goto cleanup;
+    }
+
+    if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil", true, -1, &partitioncgroup2)) != 0) {
+        fprintf(stderr, "Failed to create /cgroup.evil/cpu.evil cgroup: %d\n", -rv);
+        goto cleanup;
+    }
+
+    if ((rv = virCgroupNewPartition("/cgroup.evil/net_cls.evil/_evil.evil", true, -1, &partitioncgroup3)) != 0) {
+        fprintf(stderr, "Failed to create /cgroup.evil cgroup: %d\n", -rv);
+        goto cleanup;
+    }
+
+    if ((rv = virCgroupNewDomainPartition(partitioncgroup3, "lxc", "cpu.foo", true, &domaincgroup)) != 0) {
+        fprintf(stderr, "Cannot create LXC cgroup: %d\n", -rv);
+        goto cleanup;
+    }
+
+    /* NB we're not expecting 'net_cls.evil' to be escaped,
+     * since our fake /proc/cgroups pretends this controller
+     * isn't compiled into the kernel
+     */
+    ret = validateCgroup(domaincgroup, "/_cgroup.evil/net_cls.evil/__evil.evil/_cpu.foo.libvirt-lxc", mountsFull, links, placement);
+
+cleanup:
+    virCgroupFree(&partitioncgroup3);
+    virCgroupFree(&partitioncgroup2);
+    virCgroupFree(&partitioncgroup1);
+    virCgroupFree(&domaincgroup);
+    return ret;
+}
+
 # define FAKESYSFSDIRTEMPLATE abs_builddir "/fakesysfsdir-XXXXXX"
 
 static int
@@ -482,6 +534,8 @@ mymain(void)
     if (virtTestRun("New cgroup for domain partition", 1, testCgroupNewForPartitionDomain, NULL) < 0)
         ret = -1;
 
+    if (virtTestRun("New cgroup for domain partition escaped", 1, testCgroupNewForPartitionDomainEscaped, NULL) < 0)
+        ret = -1;
 
     if (getenv("LIBVIRT_SKIP_CLEANUP") == NULL)
         virFileDeleteTree(fakesysfsdir);
-- 
1.8.1.4

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]