Dan Smith wrote: > This patch adds src/cgroup.{c,h} with support for creating and manipulating > cgroups. It's quite naive at the moment, but should provide something to > work with to move forward with resource controls. > > All groups created with the internal API are forced under $mount/libvirt/ > to keep everything together. The first time a group is created, the libvirt > directory is also created, and the settings from the root are inherited. > > The code scans the mount table to look for the first mount of type cgroup, > and assumes that all controllers are mounted there. I think this > could/should be updated to prefer a mount with just the controller(s) we > want, if there are multiple ones. > > If you have the cpuset controller enabled, and cpuset.cpus_exclusive is 1, > then all cgroups to be created will fail. Since we probably shouldn't > blindly set the root to be non-exclusive, we may also want to consider this > condition to be "no cgroup support". > > diff -r 444e2614d0a2 -r 8e948eb88328 src/Makefile.am > --- a/src/Makefile.am Wed Sep 17 16:07:03 2008 +0000 > +++ b/src/Makefile.am Mon Sep 29 09:37:42 2008 -0700 > @@ -96,7 +96,8 @@ > lxc_conf.c lxc_conf.h \ > lxc_container.c lxc_container.h \ > lxc_controller.c \ > - veth.c veth.h > + veth.c veth.h \ > + cgroup.c cgroup.h > > OPENVZ_DRIVER_SOURCES = \ > openvz_conf.c openvz_conf.h \ > diff -r 444e2614d0a2 -r 8e948eb88328 src/cgroup.c > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/src/cgroup.c Mon Sep 29 09:37:42 2008 -0700 > @@ -0,0 +1,526 @@ > +/* > + * cgroup.c: Tools for managing cgroups > + * > + * Copyright IBM Corp. 2008 > + * > + * See COPYING.LIB for the License of this software > + * > + * Authors: > + * Dan Smith <danms@xxxxxxxxxx> > + */ > +#include <config.h> > + > +#include <stdio.h> > +#include <stdint.h> > +#include <inttypes.h> > +#include <mntent.h> > +#include <fcntl.h> > +#include <string.h> > +#include <errno.h> > +#include <stdlib.h> > +#include <stdbool.h> > +#include <sys/stat.h> > +#include <sys/types.h> > +#include <libgen.h> > + > +#include "internal.h" > +#include "util.h" > +#include "cgroup.h" > + > +#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) > +#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) > + > +struct virCgroup { > + char *path; > +}; > + There is no support for permissions, is everything run as root? > +void virCgroupFree(virCgroupPtr *group) > +{ > + if (*group != NULL) { > + free((*group)->path); > + free(*group); > + *group = NULL; > + } > +} > + > +static virCgroupPtr cgroup_get_mount(void) > +{ > + FILE *mounts; > + struct mntent entry; > + char buf[512]; Is 512 arbitrary? How do we know it is going to be sufficient? > + virCgroupPtr root = NULL; > + > + root = calloc(1, sizeof(*root)); > + if (root == NULL) > + return NULL; > + > + mounts = fopen("/proc/mounts", "r"); > + if (mounts == NULL) { > + DEBUG0("Unable to open /proc/mounts: %m"); > + goto err; > + } > + > + while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { > + if (STREQ(entry.mnt_type, "cgroup")) { > + root->path = strdup(entry.mnt_dir); > + break; > + } > + } > + > + if (root->path == NULL) { > + DEBUG0("Did not find cgroup mount"); Or strdup failed due to ENOMEM > + goto err; > + } > + > + fclose(mounts); > + > + return root; > +err: > + virCgroupFree(&root); > + > + return NULL; > +} > + > +int virCgroupHaveSupport(void) > +{ > + virCgroupPtr root; > + > + root = cgroup_get_mount(); > + if (root == NULL) > + return -1; > + > + virCgroupFree(&root); > + This is quite a horrible way of wasting computation. > + return 0; > +} > + > +static int cgroup_path_of(const char *grppath, > + const char *key, > + char **path) > +{ > + virCgroupPtr root; > + int rc = 0; > + > + root = cgroup_get_mount(); So every routine calls cgroup_path_of(), reads the mounts entry and find a entry for cgroup and returns it, why not do it just once and use it. > + if (root == NULL) { > + rc = -ENOTDIR; > + goto out; > + } > + > + if (asprintf(path, "%s/%s/%s", root->path, grppath, key) == -1) > + rc = -ENOMEM; > +out: > + virCgroupFree(&root); > + > + return rc; > +} > + > +int virCgroupSetValueStr(virCgroupPtr group, > + const char *key, > + const char *value) > +{ > + int fd = -1; > + int rc = 0; > + char *keypath = NULL; > + > + rc = cgroup_path_of(group->path, key, &keypath); > + if (rc != 0) > + return rc; > + > + fd = open(keypath, O_WRONLY); I see a mix of open and fopen calls.I would prefer to stick to just one, helps with readability. > + if (fd < 0) { > + DEBUG("Unable to open %s: %m", keypath); > + rc = -ENOENT; > + goto out; > + } > + > + DEBUG("Writing '%s' to '%s'", value, keypath); > + > + rc = safewrite(fd, value, strlen(value)); > + if (rc < 0) { > + DEBUG("Failed to write value '%s': %m", value); > + rc = -errno; > + goto out; > + } else if (rc != strlen(value)) { > + DEBUG("Short write of value '%s'", value); > + rc = -ENOSPC; > + goto out; > + } > + > + rc = 0; > +out: > + free(keypath); > + close(fd); > + > + return rc; > +} > + > +int virCgroupSetValueU64(virCgroupPtr group, > + const char *key, > + uint64_t value) > +{ > + char *strval = NULL; > + int rc; > + > + if (asprintf(&strval, "%" PRIu64, value) == -1) > + return -ENOMEM; > + > + rc = virCgroupSetValueStr(group, key, strval); > + > + free(strval); > + > + return rc; > +} > + > +int virCgroupSetValueI64(virCgroupPtr group, > + const char *key, > + int64_t value) > +{ > + char *strval = NULL; > + int rc; > + > + if (asprintf(&strval, "%" PRIi64, value) == -1) > + return -ENOMEM; > + > + rc = virCgroupSetValueStr(group, key, strval); > + > + free(strval); > + > + return rc; > +} > + > +int virCgroupGetValueStr(virCgroupPtr group, > + const char *key, > + char **value) > +{ > + int fd = -1; > + int rc; > + char *keypath = NULL; > + char buf[512]; > + I don't think 512 bytes will be sufficient. THere are multi-line files like memory.stat. > + memset(buf, 0, sizeof(buf)); > + > + rc = cgroup_path_of(group->path, key, &keypath); > + if (rc != 0) { > + DEBUG("No path of %s, %s", group->path, key); > + return rc; > + } > + > + fd = open(keypath, O_RDONLY); > + if (fd < 0) { > + DEBUG("Unable to open %s: %m", keypath); > + rc = -ENOENT; > + goto out; > + } > + > + rc = saferead(fd, buf, sizeof(buf)); > + if (rc < 0) { > + DEBUG("Failed to read %s: %m\n", keypath); > + rc = -errno; > + goto out; > + } else if (rc == 0) { > + DEBUG("Short read of %s\n", keypath); > + rc = -EIO; > + goto out; > + } > + > + *value = strdup(buf); > + if (*value == NULL) { > + rc = -ENOMEM; > + goto out; > + } > + > + rc = 0; > +out: > + free(keypath); > + close(fd); > + > + return rc; > +} > + > +int virCgroupGetValueU64(virCgroupPtr group, > + const char *key, > + uint64_t *value) > +{ > + char *strval = NULL; > + int rc = 0; > + > + rc = virCgroupGetValueStr(group, key, &strval); > + if (rc != 0) > + goto out; > + > + if (sscanf(strval, "%" SCNu64, value) != 1) > + rc = -EINVAL; > +out: > + free(strval); > + > + return rc; > +} > + > +int virCgroupGetValueI64(virCgroupPtr group, > + const char *key, > + int64_t *value) > +{ > + char *strval = NULL; > + int rc = 0; > + > + rc = virCgroupGetValueStr(group, key, &strval); > + if (rc != 0) > + goto out; > + > + if (sscanf(strval, "%" SCNi64, value) != 1) > + rc = -EINVAL; > +out: > + free(strval); > + > + return rc; > +} > + > +int virCgroupHasValue(virCgroupPtr group, > + const char *key) > +{ > + int rc; > + char *keypath = NULL; > + > + rc = cgroup_path_of(group->path, key, &keypath); > + if (rc < 0) > + goto out; > + > + if (access(keypath, F_OK) != 0) > + rc = -ENOENT; > +out: > + free(keypath); > + > + return rc; > +} > + > +static int _cgroup_inherit(virCgroupPtr parent, > + virCgroupPtr group, > + const char *key) > +{ > + int rc; > + char *keyval = NULL; > + > + rc = virCgroupGetValueStr(parent, key, &keyval); > + if (rc != 0) > + goto out; > + > + rc = virCgroupSetValueStr(group, key, keyval); > + > +out: > + free(keyval); > + > + return rc; > +} > + > +static int cgroup_inherit(virCgroupPtr parent, > + virCgroupPtr group) > +{ > + int i; > + int rc = 0; > + const char *inherit_values[] = { > + "cpuset.cpus", > + "cpuset.mems", > + NULL > + }; > + > + for (i = 0; inherit_values[i] != NULL; i++) { > + const char *key = inherit_values[i]; > + if (virCgroupHasValue(group, key) == 0) > + rc = _cgroup_inherit(parent, group, key); > + > + if (rc != 0) { > + DEBUG("inherit of %s failed\n", key); > + break; > + } > + } > + > + return rc; > +} > + > +static int cgroup_root(virCgroupPtr *root) > +{ > + int rc = 0; > + char *grppath = NULL; > + struct virCgroup _root = { (char *)"." }; > + > + *root = calloc(1, sizeof(**root)); > + if (*root == NULL) { > + rc = -ENOMEM; > + goto out; > + } > + > + (*root)->path = strdup("libvirt"); > + if ((*root)->path == NULL) { > + rc = -ENOMEM; > + goto out; > + } > + > + rc = cgroup_path_of(".", (*root)->path, &grppath); > + if (rc != 0) > + goto out; > + > + if (access(grppath, F_OK) != 0) { > + if (mkdir(grppath, 0655) != 0) > + rc = -errno; > + else > + rc = cgroup_inherit(&_root, *root); > + } Yikes... The whole thing looks messy and magical, what is the code trying to do with dots and libvirt and 0655's? > +out: > + if (rc != 0) > + virCgroupFree(root); > + free(grppath); > + > + return rc; > +} > + > +static int cgroup_new(virCgroupPtr *parent, > + const char *group, > + virCgroupPtr *newgroup) > +{ > + int rc = 0; > + char *typpath = NULL; > + > + if (*parent == NULL) { > + rc = cgroup_root(parent); > + if (rc != 0) > + goto err; > + } > + > + *newgroup = calloc(1, sizeof(**newgroup)); > + if (*newgroup == NULL) { > + rc = -ENOMEM; > + goto err; > + } > + > + rc = asprintf(&((*newgroup)->path), > + "%s/%s", > + (*parent)->path, > + group); > + if (rc == -1) { > + rc = -ENOMEM; > + goto err; > + } > + > + rc = 0; > + > + return rc; > +err: > + virCgroupFree(newgroup); > + *newgroup = NULL; > + > + free(typpath); > + > + return rc; > +} > + > +int virCgroupOpen(virCgroupPtr parent, > + const char *group, > + virCgroupPtr *newgroup) > +{ > + int rc = 0; > + char *grppath = NULL; > + > + rc = cgroup_new(&parent, group, newgroup); > + if (rc != 0) > + goto err; > + virCgroupFree(&parent); > + > + rc = cgroup_path_of(".", (*newgroup)->path, &grppath); > + if (rc != 0) > + goto err; > + > + if (access(grppath, F_OK) != 0) { > + rc = -ENOENT; > + goto err; > + } > + > + return rc; > +err: > + virCgroupFree(newgroup); > + *newgroup = NULL; > + > + return rc; > +} > + > +int virCgroupCreate(virCgroupPtr parent, > + const char *group, > + virCgroupPtr *newgroup) > +{ > + int rc = 0; > + char *grppath = NULL; > + bool free_parent = (parent == NULL); > + > + rc = cgroup_new(&parent, group, newgroup); > + if (rc != 0) { > + DEBUG0("Unable to allocate new virCgroup structure"); > + goto err; > + } > + > + rc = cgroup_path_of(".", (*newgroup)->path, &grppath); > + if (rc != 0) > + goto err; > + > + if (access(grppath, F_OK) == 0) { > + rc = -EEXIST; > + goto err; > + } > + Why do you need access? mkdir will do that check anyway. > + if (mkdir(grppath, 0655) != 0) { Why 0655? Why not use meaningful names see sys/stat.h > + DEBUG("mkdir of %s failed", grppath); > + rc = -errno; > + goto err; > + } > + > + rc = cgroup_inherit(parent, *newgroup); > + if (rc != 0) > + goto err; > + > + free(grppath); > + > + if (free_parent) > + virCgroupFree(&parent); > + > + return rc; > +err: > + free(grppath); > + virCgroupFree(newgroup); > + *newgroup = NULL; > + > + if (free_parent) > + virCgroupFree(&parent); > + > + return rc; > +} > + > +int virCgroupRemove(virCgroupPtr group) > +{ > + int rc = 0; > + char *grppath = NULL; > + > + rc = cgroup_path_of(".", group->path, &grppath); > + if (rc != 0) > + goto out; > + > + if (rmdir(grppath) != 0) > + rc = -errno; > +out: > + free(grppath); > + > + return rc; > +} > + > +int virCgroupAddTask(virCgroupPtr group, pid_t pid) > +{ > + int rc = 0; > + char *pidstr = NULL; > + > + if (asprintf(&pidstr, "%lu", (unsigned long)pid) == -1) > + return -ENOMEM; > + > + rc = virCgroupSetValueStr(group, "tasks", pidstr); > + > + free(pidstr); > + > + return rc; > +} > diff -r 444e2614d0a2 -r 8e948eb88328 src/cgroup.h > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/src/cgroup.h Mon Sep 29 09:37:42 2008 -0700 > @@ -0,0 +1,120 @@ > +/* > + * cgroup.h: Interface to tools for managing cgroups > + * > + * Copyright IBM Corp. 2008 > + * > + * See COPYING.LIB for the License of this software > + * > + * Authors: > + * Dan Smith <danms@xxxxxxxxxx> > + */ > + > +#ifndef CGROUP_H > +#define CGROUP_H > + > +#include <stdint.h> > + > +struct virCgroup; > +typedef struct virCgroup *virCgroupPtr; > + > +/** > + * virCgroupHaveSupport: > + * > + * Returns 0 if support is present, negative if not > + */ > +int virCgroupHaveSupport(void); > + > +/** > + * virCgroupCreate: > + * @parent: The path to the parent of the desired new group, or NULL if root > + * @group: The name of the new group > + * @newgroup: The newly-created group. Must be passed to cgroup_free() > + * > + * Returns 0 on success, negative on failure > + */ > +int virCgroupCreate(virCgroupPtr parent, > + const char *group, > + virCgroupPtr *newgroup); > + > +/** > + * virCgroupOpen: > + * @parent: The path to the parent of the desired new group, or NULL if root > + * @group: The name of the group > + * @newgroup: The group. Must be passed to cgroup_free() > + * > + * Returns 0 on success, negative on failure > + */ > +int virCgroupOpen(virCgroupPtr parent, > + const char *group, > + virCgroupPtr *newgroup); > + > +/** > + * virCgroupFree: > + * @group: A pointer to the virCgroupPtr to be freed > + */ > +void virCgroupFree(virCgroupPtr *group); > + > +/** > + * virCgroupRemove > + * @parent: The path to the parent of the group to be deleted > + * @group: The name of the group to be removed > + * > + * Returns 0 on success, negative on failure > + */ > +int virCgroupRemove(virCgroupPtr group); > + > +/** > + * virCgroupHasValue: > + * @group: The scoping cgroup > + * @key: The cgrop node to test for > + * > + * Returns 0 if present, negative if not > + */ > +int virCgroupHasValue(virCgroupPtr group, const char *key); > + > +/** > + * virCgroupAddTask: > + * @group: The scoping cgroup > + * @pid: The pid of the task to add > + * > + * Returns 0 on success, negative on failure > + */ > +int virCgroupAddTask(virCgroupPtr group, pid_t pid); > + > +/** > + * virCgroupSetValueXXX: > + * @group: The scoping cgroup > + * @key: The cgroup node to set > + * @value: The value to write into the @key node > + * > + * Returns 0 on success, negative on failure > + */ > +int virCgroupSetValueStr(virCgroupPtr group, > + const char *key, > + const char *value); > +int virCgroupSetValueU64(virCgroupPtr group, > + const char *key, > + uint64_t value); > +int virCgroupSetValueI64(virCgroupPtr group, > + const char *key, > + int64_t value); > + > +/** > + * virCgroupGetValueXXX: > + * @group: The scoping cgroup > + * @key: The cgroup node to read > + * @value: A pointer to the storage area > + * > + * Returns 0 on success, negative on failure > + */ > +int virCgroupGetValueStr(virCgroupPtr group, > + const char *key, > + char **value); > +int virCgroupGetValueU64(virCgroupPtr group, > + const char *key, > + uint64_t *value); > +int virCgroupGetValueI64(virCgroupPtr group, > + const char *key, > + int64_t *value); > + > +#endif /* CGROUP_H */ > > -- > Libvir-list mailing list > Libvir-list@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/libvir-list > -- Balbir -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list