From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> To bring in line with new naming practice, rename the= src/util/cgroup.{h,c} files to vircgroup.{h,c} Signed-off-by: Daniel P. Berrange <berrange@xxxxxxxxxx> --- po/POTFILES.in | 2 +- src/Makefile.am | 2 +- src/conf/domain_audit.h | 2 +- src/lxc/lxc_cgroup.c | 2 +- src/lxc/lxc_conf.h | 2 +- src/qemu/qemu_cgroup.c | 2 +- src/qemu/qemu_conf.h | 2 +- src/util/cgroup.c | 2099 ----------------------------------------------- src/util/cgroup.h | 167 ---- src/util/vircgroup.c | 2099 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/vircgroup.h | 167 ++++ 11 files changed, 2273 insertions(+), 2273 deletions(-) delete mode 100644 src/util/cgroup.c delete mode 100644 src/util/cgroup.h create mode 100644 src/util/vircgroup.c create mode 100644 src/util/vircgroup.h diff --git a/po/POTFILES.in b/po/POTFILES.in index f0cfd7f..19ed187 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -137,7 +137,6 @@ src/storage/storage_driver.c src/test/test_driver.c src/uml/uml_conf.c src/uml/uml_driver.c -src/util/cgroup.c src/util/command.c src/util/conf.c src/util/dnsmasq.c @@ -157,6 +156,7 @@ src/util/util.c src/util/viraudit.c src/util/virauth.c src/util/virauthconfig.c +src/util/vircgroup.c src/util/virdbus.c src/util/virfile.c src/util/virhash.c diff --git a/src/Makefile.am b/src/Makefile.am index 53ec9da..149ffc9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -57,7 +57,6 @@ UTIL_SOURCES = \ util/buf.c util/buf.h \ util/command.c util/command.h \ util/conf.c util/conf.h \ - util/cgroup.c util/cgroup.h \ util/event.c util/event.h \ util/event_poll.c util/event_poll.h \ util/hooks.c util/hooks.h \ @@ -92,6 +91,7 @@ UTIL_SOURCES = \ util/virtypedparam.c util/virtypedparam.h \ util/xml.c util/xml.h \ util/virterror.c util/virterror_internal.h \ + util/vircgroup.c util/vircgroup.h \ util/virdbus.c util/virdbus.h \ util/virhash.c util/virhash.h \ util/virhashcode.c util/virhashcode.h \ diff --git a/src/conf/domain_audit.h b/src/conf/domain_audit.h index 63a8f75..381fe37 100644 --- a/src/conf/domain_audit.h +++ b/src/conf/domain_audit.h @@ -25,7 +25,7 @@ # define __VIR_DOMAIN_AUDIT_H__ # include "domain_conf.h" -# include "cgroup.h" +# include "vircgroup.h" void virDomainAuditStart(virDomainObjPtr vm, const char *reason, diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index 767ef26..e354d6c 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -27,7 +27,7 @@ #include "virterror_internal.h" #include "logging.h" #include "memory.h" -#include "cgroup.h" +#include "vircgroup.h" #define VIR_FROM_THIS VIR_FROM_LXC diff --git a/src/lxc/lxc_conf.h b/src/lxc/lxc_conf.h index ea345be..c02966f 100644 --- a/src/lxc/lxc_conf.h +++ b/src/lxc/lxc_conf.h @@ -32,7 +32,7 @@ # include "domain_event.h" # include "capabilities.h" # include "threads.h" -# include "cgroup.h" +# include "vircgroup.h" # include "security/security_manager.h" # include "configmake.h" diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c index 30cd1d6..6c6607d 100644 --- a/src/qemu/qemu_cgroup.c +++ b/src/qemu/qemu_cgroup.c @@ -26,7 +26,7 @@ #include "qemu_cgroup.h" #include "qemu_domain.h" #include "qemu_process.h" -#include "cgroup.h" +#include "vircgroup.h" #include "logging.h" #include "memory.h" #include "virterror_internal.h" diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h index 1a39946..cfa6fff 100644 --- a/src/qemu/qemu_conf.h +++ b/src/qemu/qemu_conf.h @@ -34,7 +34,7 @@ # include "domain_event.h" # include "threads.h" # include "security/security_manager.h" -# include "cgroup.h" +# include "vircgroup.h" # include "pci.h" # include "hostusb.h" # include "cpu_conf.h" diff --git a/src/util/cgroup.c b/src/util/cgroup.c deleted file mode 100644 index 8f3b8b7..0000000 --- a/src/util/cgroup.c +++ /dev/null @@ -1,2099 +0,0 @@ -/* - * cgroup.c: Tools for managing cgroups - * - * Copyright (C) 2010-2012 Red Hat, Inc. - * Copyright IBM Corp. 2008 - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see - * <http://www.gnu.org/licenses/>. - * - * Authors: - * Dan Smith <danms@xxxxxxxxxx> - */ -#include <config.h> - -#include <stdio.h> -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -# include <mntent.h> -#endif -#include <fcntl.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <signal.h> -#include <libgen.h> -#include <dirent.h> - -#include "internal.h" -#include "util.h" -#include "memory.h" -#include "cgroup.h" -#include "logging.h" -#include "virfile.h" -#include "virhash.h" -#include "virhashcode.h" - -#define CGROUP_MAX_VAL 512 - -VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, - "cpu", "cpuacct", "cpuset", "memory", "devices", - "freezer", "blkio"); - -struct virCgroupController { - int type; - char *mountPoint; - char *placement; -}; - -struct virCgroup { - char *path; - - struct virCgroupController controllers[VIR_CGROUP_CONTROLLER_LAST]; -}; - -typedef enum { - VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */ - VIR_CGROUP_MEM_HIERACHY = 1 << 0, /* call virCgroupSetMemoryUseHierarchy - * before creating subcgroups and - * attaching tasks - */ - VIR_CGROUP_VCPU = 1 << 1, /* create subdir only under the cgroup cpu, - * cpuacct and cpuset if possible. */ -} virCgroupFlags; - -/** - * virCgroupFree: - * - * @group: The group structure to free - */ -void virCgroupFree(virCgroupPtr *group) -{ - int i; - - if (*group == NULL) - return; - - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - VIR_FREE((*group)->controllers[i].mountPoint); - VIR_FREE((*group)->controllers[i].placement); - } - - VIR_FREE((*group)->path); - VIR_FREE(*group); -} - -/** - * virCgroupMounted: query whether a cgroup subsystem is mounted or not - * - * @cgroup: The group structure to be queried - * @controller: cgroup subsystem id - * - * Returns true if a cgroup is subsystem is mounted. - */ -bool virCgroupMounted(virCgroupPtr cgroup, int controller) -{ - return cgroup->controllers[controller].mountPoint != NULL; -} - -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -/* - * Process /proc/mounts figuring out what controllers are - * mounted and where - */ -static int virCgroupDetectMounts(virCgroupPtr group) -{ - int i; - FILE *mounts = NULL; - struct mntent entry; - char buf[CGROUP_MAX_VAL]; - - mounts = fopen("/proc/mounts", "r"); - if (mounts == NULL) { - VIR_ERROR(_("Unable to open /proc/mounts")); - return -ENOENT; - } - - while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { - if (STRNEQ(entry.mnt_type, "cgroup")) - continue; - - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *typestr = virCgroupControllerTypeToString(i); - int typelen = strlen(typestr); - char *tmp = entry.mnt_opts; - while (tmp) { - char *next = strchr(tmp, ','); - int len; - if (next) { - len = next-tmp; - next++; - } else { - len = strlen(tmp); - } - /* NB, the same controller can appear >1 time in mount list - * due to bind mounts from one location to another. Pick the - * first entry only - */ - if (typelen == len && STREQLEN(typestr, tmp, len) && - !group->controllers[i].mountPoint && - !(group->controllers[i].mountPoint = strdup(entry.mnt_dir))) - goto no_memory; - tmp = next; - } - } - } - - VIR_FORCE_FCLOSE(mounts); - - return 0; - -no_memory: - VIR_FORCE_FCLOSE(mounts); - return -ENOMEM; -} - - -/* - * Process /proc/self/cgroup figuring out what cgroup - * sub-path the current process is assigned to. ie not - * necessarily in the root - */ -static int virCgroupDetectPlacement(virCgroupPtr group) -{ - int i; - FILE *mapping = NULL; - char line[1024]; - - mapping = fopen("/proc/self/cgroup", "r"); - if (mapping == NULL) { - VIR_ERROR(_("Unable to open /proc/self/cgroup")); - return -ENOENT; - } - - while (fgets(line, sizeof(line), mapping) != NULL) { - char *controllers = strchr(line, ':'); - char *path = controllers ? strchr(controllers+1, ':') : NULL; - char *nl = path ? strchr(path, '\n') : NULL; - - if (!controllers || !path) - continue; - - if (nl) - *nl = '\0'; - - *path = '\0'; - controllers++; - path++; - - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - const char *typestr = virCgroupControllerTypeToString(i); - int typelen = strlen(typestr); - char *tmp = controllers; - while (tmp) { - char *next = strchr(tmp, ','); - int len; - if (next) { - len = next-tmp; - next++; - } else { - len = strlen(tmp); - } - if (typelen == len && STREQLEN(typestr, tmp, len) && - !(group->controllers[i].placement = strdup(STREQ(path, "/") ? "" : path))) - goto no_memory; - - tmp = next; - } - } - } - - VIR_FORCE_FCLOSE(mapping); - - return 0; - -no_memory: - VIR_FORCE_FCLOSE(mapping); - return -ENOMEM; - -} - -static int virCgroupDetect(virCgroupPtr group) -{ - int any = 0; - int rc; - int i; - - rc = virCgroupDetectMounts(group); - if (rc < 0) { - VIR_ERROR(_("Failed to detect mounts for %s"), group->path); - return rc; - } - - /* Check that at least 1 controller is available */ - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - if (group->controllers[i].mountPoint != NULL) - any = 1; - } - if (!any) - return -ENXIO; - - - rc = virCgroupDetectPlacement(group); - - if (rc == 0) { - /* Check that for every mounted controller, we found our placement */ - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - if (!group->controllers[i].mountPoint) - continue; - - if (!group->controllers[i].placement) { - VIR_ERROR(_("Could not find placement for controller %s at %s"), - virCgroupControllerTypeToString(i), - group->controllers[i].placement); - rc = -ENOENT; - break; - } - - VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i, - virCgroupControllerTypeToString(i), - group->controllers[i].mountPoint, - group->controllers[i].placement); - } - } else { - VIR_ERROR(_("Failed to detect mapping for %s"), group->path); - } - - return rc; -} -#endif - - -int virCgroupPathOfController(virCgroupPtr group, - int controller, - const char *key, - char **path) -{ - if (controller == -1) { - int i; - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - if (group->controllers[i].mountPoint && - group->controllers[i].placement) { - controller = i; - break; - } - } - } - if (controller == -1) - return -ENOSYS; - - if (group->controllers[controller].mountPoint == NULL) - return -ENOENT; - - if (group->controllers[controller].placement == NULL) - return -ENOENT; - - if (virAsprintf(path, "%s%s%s/%s", - group->controllers[controller].mountPoint, - group->controllers[controller].placement, - STREQ(group->path, "/") ? "" : group->path, - key ? key : "") == -1) - return -ENOMEM; - - return 0; -} - - -static int virCgroupSetValueStr(virCgroupPtr group, - int controller, - const char *key, - const char *value) -{ - int rc = 0; - char *keypath = NULL; - - rc = virCgroupPathOfController(group, controller, key, &keypath); - if (rc != 0) - return rc; - - VIR_DEBUG("Set value '%s' to '%s'", keypath, value); - rc = virFileWriteStr(keypath, value, 0); - if (rc < 0) { - rc = -errno; - VIR_DEBUG("Failed to write value '%s': %m", value); - } else { - rc = 0; - } - - VIR_FREE(keypath); - - return rc; -} - -static int virCgroupGetValueStr(virCgroupPtr group, - int controller, - const char *key, - char **value) -{ - int rc; - char *keypath = NULL; - - *value = NULL; - - rc = virCgroupPathOfController(group, controller, key, &keypath); - if (rc != 0) { - VIR_DEBUG("No path of %s, %s", group->path, key); - return rc; - } - - VIR_DEBUG("Get value %s", keypath); - - rc = virFileReadAll(keypath, 1024*1024, value); - if (rc < 0) { - rc = -errno; - VIR_DEBUG("Failed to read %s: %m\n", keypath); - } else { - /* Terminated with '\n' has sometimes harmful effects to the caller */ - if ((*value)[rc - 1] == '\n') - (*value)[rc - 1] = '\0'; - - rc = 0; - } - - VIR_FREE(keypath); - - return rc; -} - -static int virCgroupSetValueU64(virCgroupPtr group, - int controller, - const char *key, - unsigned long long int value) -{ - char *strval = NULL; - int rc; - - if (virAsprintf(&strval, "%llu", value) == -1) - return -ENOMEM; - - rc = virCgroupSetValueStr(group, controller, key, strval); - - VIR_FREE(strval); - - return rc; -} - - - -static int virCgroupSetValueI64(virCgroupPtr group, - int controller, - const char *key, - long long int value) -{ - char *strval = NULL; - int rc; - - if (virAsprintf(&strval, "%lld", value) == -1) - return -ENOMEM; - - rc = virCgroupSetValueStr(group, controller, key, strval); - - VIR_FREE(strval); - - return rc; -} - -static int virCgroupGetValueI64(virCgroupPtr group, - int controller, - const char *key, - long long int *value) -{ - char *strval = NULL; - int rc = 0; - - rc = virCgroupGetValueStr(group, controller, key, &strval); - if (rc != 0) - goto out; - - if (virStrToLong_ll(strval, NULL, 10, value) < 0) - rc = -EINVAL; -out: - VIR_FREE(strval); - - return rc; -} - -static int virCgroupGetValueU64(virCgroupPtr group, - int controller, - const char *key, - unsigned long long int *value) -{ - char *strval = NULL; - int rc = 0; - - rc = virCgroupGetValueStr(group, controller, key, &strval); - if (rc != 0) - goto out; - - if (virStrToLong_ull(strval, NULL, 10, value) < 0) - rc = -EINVAL; -out: - VIR_FREE(strval); - - return rc; -} - - -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group) -{ - int i; - int rc = 0; - const char *inherit_values[] = { - "cpuset.cpus", - "cpuset.mems", - }; - - VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path); - for (i = 0; i < ARRAY_CARDINALITY(inherit_values) ; i++) { - char *value; - - rc = virCgroupGetValueStr(parent, - VIR_CGROUP_CONTROLLER_CPUSET, - inherit_values[i], - &value); - if (rc != 0) { - VIR_ERROR(_("Failed to get %s %d"), inherit_values[i], rc); - break; - } - - VIR_DEBUG("Inherit %s = %s", inherit_values[i], value); - - rc = virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_CPUSET, - inherit_values[i], - value); - VIR_FREE(value); - - if (rc != 0) { - VIR_ERROR(_("Failed to set %s %d"), inherit_values[i], rc); - break; - } - } - - return rc; -} - -static int virCgroupSetMemoryUseHierarchy(virCgroupPtr group) -{ - int rc = 0; - unsigned long long value; - const char *filename = "memory.use_hierarchy"; - - rc = virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - filename, &value); - if (rc != 0) { - VIR_ERROR(_("Failed to read %s/%s (%d)"), group->path, filename, rc); - return rc; - } - - /* Setting twice causes error, so if already enabled, skip setting */ - if (value == 1) - return 0; - - VIR_DEBUG("Setting up %s/%s", group->path, filename); - rc = virCgroupSetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - filename, 1); - - if (rc != 0) { - VIR_ERROR(_("Failed to set %s/%s (%d)"), group->path, filename, rc); - } - - return rc; -} - -static int virCgroupMakeGroup(virCgroupPtr parent, - virCgroupPtr group, - bool create, - unsigned int flags) -{ - int i; - int rc = 0; - - VIR_DEBUG("Make group %s", group->path); - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - char *path = NULL; - - /* Skip over controllers that aren't mounted */ - if (!group->controllers[i].mountPoint) - continue; - - /* We need to control cpu bandwidth for each vcpu now */ - if ((flags & VIR_CGROUP_VCPU) && - (i != VIR_CGROUP_CONTROLLER_CPU && - i != VIR_CGROUP_CONTROLLER_CPUACCT && - i != VIR_CGROUP_CONTROLLER_CPUSET)) { - /* treat it as unmounted and we can use virCgroupAddTask */ - VIR_FREE(group->controllers[i].mountPoint); - continue; - } - - rc = virCgroupPathOfController(group, i, "", &path); - if (rc < 0) - return rc; - /* As of Feb 2011, clang can't see that the above function - * call did not modify group. */ - sa_assert(group->controllers[i].mountPoint); - - VIR_DEBUG("Make controller %s", path); - if (access(path, F_OK) != 0) { - if (!create || - mkdir(path, 0755) < 0) { - /* With a kernel that doesn't support multi-level directory - * for blkio controller, libvirt will fail and disable all - * other controllers even though they are available. So - * treat blkio as unmounted if mkdir fails. */ - if (i == VIR_CGROUP_CONTROLLER_BLKIO) { - rc = 0; - VIR_FREE(group->controllers[i].mountPoint); - VIR_FREE(path); - continue; - } else { - rc = -errno; - VIR_FREE(path); - break; - } - } - if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL && - (i == VIR_CGROUP_CONTROLLER_CPUSET || - STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) { - rc = virCgroupCpuSetInherit(parent, group); - if (rc != 0) { - VIR_FREE(path); - break; - } - } - /* - * Note that virCgroupSetMemoryUseHierarchy should always be - * called prior to creating subcgroups and attaching tasks. - */ - if ((flags & VIR_CGROUP_MEM_HIERACHY) && - (group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint != NULL) && - (i == VIR_CGROUP_CONTROLLER_MEMORY || - STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint))) { - rc = virCgroupSetMemoryUseHierarchy(group); - if (rc != 0) { - VIR_FREE(path); - break; - } - } - } - - VIR_FREE(path); - } - - return rc; -} - - -static int virCgroupNew(const char *path, - virCgroupPtr *group) -{ - int rc = 0; - char *typpath = NULL; - - VIR_DEBUG("New group %s", path); - *group = NULL; - - if (VIR_ALLOC((*group)) != 0) { - rc = -ENOMEM; - goto err; - } - - if (!((*group)->path = strdup(path))) { - rc = -ENOMEM; - goto err; - } - - rc = virCgroupDetect(*group); - if (rc < 0) - goto err; - - return rc; -err: - virCgroupFree(group); - *group = NULL; - - VIR_FREE(typpath); - - return rc; -} - -static int virCgroupAppRoot(bool privileged, - virCgroupPtr *group, - bool create) -{ - virCgroupPtr rootgrp = NULL; - int rc; - - rc = virCgroupNew("/", &rootgrp); - if (rc != 0) - return rc; - - if (privileged) { - rc = virCgroupNew("/libvirt", group); - } else { - char *rootname; - char *username; - username = virGetUserName(getuid()); - if (!username) { - rc = -ENOMEM; - goto cleanup; - } - rc = virAsprintf(&rootname, "/libvirt-%s", username); - VIR_FREE(username); - if (rc < 0) { - rc = -ENOMEM; - goto cleanup; - } - - rc = virCgroupNew(rootname, group); - VIR_FREE(rootname); - } - if (rc != 0) - goto cleanup; - - rc = virCgroupMakeGroup(rootgrp, *group, create, VIR_CGROUP_NONE); - -cleanup: - virCgroupFree(&rootgrp); - return rc; -} -#endif - -#if defined _DIRENT_HAVE_D_TYPE -static int virCgroupRemoveRecursively(char *grppath) -{ - DIR *grpdir; - struct dirent *ent; - int rc = 0; - - grpdir = opendir(grppath); - if (grpdir == NULL) { - if (errno == ENOENT) - return 0; - rc = -errno; - VIR_ERROR(_("Unable to open %s (%d)"), grppath, errno); - return rc; - } - - for (;;) { - char *path; - - errno = 0; - ent = readdir(grpdir); - if (ent == NULL) { - if ((rc = -errno)) - VIR_ERROR(_("Failed to readdir for %s (%d)"), grppath, errno); - break; - } - - if (ent->d_name[0] == '.') continue; - if (ent->d_type != DT_DIR) continue; - - if (virAsprintf(&path, "%s/%s", grppath, ent->d_name) == -1) { - rc = -ENOMEM; - break; - } - rc = virCgroupRemoveRecursively(path); - VIR_FREE(path); - if (rc != 0) - break; - } - closedir(grpdir); - - VIR_DEBUG("Removing cgroup %s", grppath); - if (rmdir(grppath) != 0 && errno != ENOENT) { - rc = -errno; - VIR_ERROR(_("Unable to remove %s (%d)"), grppath, errno); - } - - return rc; -} -#else -static int virCgroupRemoveRecursively(char *grppath ATTRIBUTE_UNUSED) -{ - /* Claim no support */ - return -ENXIO; -} -#endif - -/** - * virCgroupRemove: - * - * @group: The group to be removed - * - * It first removes all child groups recursively - * in depth first order and then removes @group - * because the presence of the child groups - * prevents removing @group. - * - * Returns: 0 on success - */ -int virCgroupRemove(virCgroupPtr group) -{ - int rc = 0; - int i; - char *grppath = NULL; - - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - /* Skip over controllers not mounted */ - if (!group->controllers[i].mountPoint) - continue; - - if (virCgroupPathOfController(group, - i, - NULL, - &grppath) != 0) - continue; - - VIR_DEBUG("Removing cgroup %s and all child cgroups", grppath); - rc = virCgroupRemoveRecursively(grppath); - VIR_FREE(grppath); - } - - return rc; -} - -/** - * virCgroupAddTask: - * - * @group: The cgroup to add a task to - * @pid: The pid of the task to add - * - * Returns: 0 on success - */ -int virCgroupAddTask(virCgroupPtr group, pid_t pid) -{ - int rc = 0; - int i; - - for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { - /* Skip over controllers not mounted */ - if (!group->controllers[i].mountPoint) - continue; - - rc = virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid); - if (rc != 0) - break; - } - - return rc; -} - -/** - * virCgroupAddTaskController: - * - * @group: The cgroup to add a task to - * @pid: The pid of the task to add - * @controller: The cgroup controller to be operated on - * - * Returns: 0 on success or -errno on failure - */ -int virCgroupAddTaskController(virCgroupPtr group, pid_t pid, int controller) -{ - if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST) - return -EINVAL; - - if (!group->controllers[controller].mountPoint) - return -EINVAL; - - return virCgroupSetValueU64(group, controller, "tasks", - (unsigned long long)pid); -} - - -static int virCgroupAddTaskStrController(virCgroupPtr group, - const char *pidstr, - int controller) -{ - char *str = NULL, *cur = NULL, *next = NULL; - unsigned long long p = 0; - int rc = 0; - char *endp; - - if (virAsprintf(&str, "%s", pidstr) < 0) - return -1; - - cur = str; - while (*cur != '\0') { - rc = virStrToLong_ull(cur, &endp, 10, &p); - if (rc != 0) - goto cleanup; - - rc = virCgroupAddTaskController(group, p, controller); - if (rc != 0) - goto cleanup; - - next = strchr(cur, '\n'); - if (next) { - cur = next + 1; - *next = '\0'; - } else { - break; - } - } - -cleanup: - VIR_FREE(str); - return rc; -} - -/** - * virCgroupMoveTask: - * - * @src_group: The source cgroup where all tasks are removed from - * @dest_group: The destination where all tasks are added to - * @controller: The cgroup controller to be operated on - * - * Returns: 0 on success or -errno on failure - */ -int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group, - int controller) -{ - int rc = 0, err = 0; - char *content = NULL; - - if (controller < VIR_CGROUP_CONTROLLER_CPU || - controller > VIR_CGROUP_CONTROLLER_BLKIO) - return -EINVAL; - - if (!src_group->controllers[controller].mountPoint || - !dest_group->controllers[controller].mountPoint) { - VIR_WARN("no vm cgroup in controller %d", controller); - return 0; - } - - rc = virCgroupGetValueStr(src_group, controller, "tasks", &content); - if (rc != 0) - return rc; - - rc = virCgroupAddTaskStrController(dest_group, content, controller); - if (rc != 0) - goto cleanup; - - VIR_FREE(content); - - return 0; - -cleanup: - /* - * We don't need to recover dest_cgroup because cgroup will make sure - * that one task only resides in one cgroup of the same controller. - */ - err = virCgroupAddTaskStrController(src_group, content, controller); - if (err != 0) - VIR_ERROR(_("Cannot recover cgroup %s from %s"), - src_group->controllers[controller].mountPoint, - dest_group->controllers[controller].mountPoint); - VIR_FREE(content); - - return rc; -} - -/** - * virCgroupForDriver: - * - * @name: name of this driver (e.g., xen, qemu, lxc) - * @group: Pointer to returned virCgroupPtr - * - * Returns 0 on success - */ -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -int virCgroupForDriver(const char *name, - virCgroupPtr *group, - bool privileged, - bool create) -{ - int rc; - char *path = NULL; - virCgroupPtr rootgrp = NULL; - - rc = virCgroupAppRoot(privileged, &rootgrp, create); - if (rc != 0) - goto out; - - if (virAsprintf(&path, "%s/%s", rootgrp->path, name) < 0) { - rc = -ENOMEM; - goto out; - } - - rc = virCgroupNew(path, group); - VIR_FREE(path); - - if (rc == 0) { - rc = virCgroupMakeGroup(rootgrp, *group, create, VIR_CGROUP_NONE); - if (rc != 0) - virCgroupFree(group); - } - -out: - virCgroupFree(&rootgrp); - - return rc; -} -#else -int virCgroupForDriver(const char *name ATTRIBUTE_UNUSED, - virCgroupPtr *group ATTRIBUTE_UNUSED, - bool privileged ATTRIBUTE_UNUSED, - bool create ATTRIBUTE_UNUSED) -{ - /* Claim no support */ - return -ENXIO; -} -#endif - -/** -* virCgroupGetAppRoot: -* -* @group: Pointer to returned virCgroupPtr -* -* Returns 0 on success -*/ -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -int virCgroupGetAppRoot(virCgroupPtr *group) -{ - return virCgroupNew("/", group); -} -#else -int virCgroupGetAppRoot(virCgroupPtr *group ATTRIBUTE_UNUSED) -{ - return -ENXIO; -} -#endif - -/** - * virCgroupForDomain: - * - * @driver: group for driver owning the domain - * @name: name of the domain - * @group: Pointer to returned virCgroupPtr - * - * Returns 0 on success - */ -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -int virCgroupForDomain(virCgroupPtr driver, - const char *name, - virCgroupPtr *group, - bool create) -{ - int rc; - char *path; - - if (driver == NULL) - return -EINVAL; - - if (virAsprintf(&path, "%s/%s", driver->path, name) < 0) - return -ENOMEM; - - rc = virCgroupNew(path, group); - VIR_FREE(path); - - if (rc == 0) { - /* - * Create a cgroup with memory.use_hierarchy enabled to - * surely account memory usage of lxc with ns subsystem - * enabled. (To be exact, memory and ns subsystems are - * enabled at the same time.) - * - * The reason why doing it here, not a upper group, say - * a group for driver, is to avoid overhead to track - * cumulative usage that we don't need. - */ - rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_MEM_HIERACHY); - if (rc != 0) - virCgroupFree(group); - } - - return rc; -} -#else -int virCgroupForDomain(virCgroupPtr driver ATTRIBUTE_UNUSED, - const char *name ATTRIBUTE_UNUSED, - virCgroupPtr *group ATTRIBUTE_UNUSED, - bool create ATTRIBUTE_UNUSED) -{ - return -ENXIO; -} -#endif - -/** - * virCgroupForVcpu: - * - * @driver: group for the domain - * @vcpuid: id of the vcpu - * @group: Pointer to returned virCgroupPtr - * - * Returns 0 on success - */ -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -int virCgroupForVcpu(virCgroupPtr driver, - int vcpuid, - virCgroupPtr *group, - bool create) -{ - int rc; - char *path; - - if (driver == NULL) - return -EINVAL; - - if (virAsprintf(&path, "%s/vcpu%d", driver->path, vcpuid) < 0) - return -ENOMEM; - - rc = virCgroupNew(path, group); - VIR_FREE(path); - - if (rc == 0) { - rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); - if (rc != 0) - virCgroupFree(group); - } - - return rc; -} -#else -int virCgroupForVcpu(virCgroupPtr driver ATTRIBUTE_UNUSED, - int vcpuid ATTRIBUTE_UNUSED, - virCgroupPtr *group ATTRIBUTE_UNUSED, - bool create ATTRIBUTE_UNUSED) -{ - return -ENXIO; -} -#endif - -/** - * virCgroupForEmulator: - * - * @driver: group for the domain - * @group: Pointer to returned virCgroupPtr - * - * Returns: 0 on success or -errno on failure - */ -#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -int virCgroupForEmulator(virCgroupPtr driver, - virCgroupPtr *group, - bool create) -{ - int rc; - char *path; - - if (driver == NULL) - return -EINVAL; - - if (virAsprintf(&path, "%s/emulator", driver->path) < 0) - return -ENOMEM; - - rc = virCgroupNew(path, group); - VIR_FREE(path); - - if (rc == 0) { - rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); - if (rc != 0) - virCgroupFree(group); - } - - return rc; -} -#else -int virCgroupForEmulator(virCgroupPtr driver ATTRIBUTE_UNUSED, - virCgroupPtr *group ATTRIBUTE_UNUSED, - bool create ATTRIBUTE_UNUSED) -{ - return -ENXIO; -} - -#endif -/** - * virCgroupSetBlkioWeight: - * - * @group: The cgroup to change io weight for - * @weight: The Weight for this cgroup - * - * Returns: 0 on success - */ -int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight) -{ - if (weight > 1000 || weight < 100) - return -EINVAL; - - return virCgroupSetValueU64(group, - VIR_CGROUP_CONTROLLER_BLKIO, - "blkio.weight", - weight); -} - -/** - * virCgroupGetBlkioWeight: - * - * @group: The cgroup to get weight for - * @Weight: Pointer to returned weight - * - * Returns: 0 on success - */ -int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight) -{ - unsigned long long tmp; - int ret; - ret = virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_BLKIO, - "blkio.weight", &tmp); - if (ret == 0) - *weight = tmp; - return ret; -} - -/** - * virCgroupSetBlkioDeviceWeight: - * - * @group: The cgroup to change io device weight device for - * @path: The device with a weight to alter - * @weight: The new device weight (100-1000), or 0 to clear - * - * device_weight is treated as a write-only parameter, so - * there isn't a getter counterpart. - * - * Returns: 0 on success, -errno on failure - */ -#if defined(major) && defined(minor) -int virCgroupSetBlkioDeviceWeight(virCgroupPtr group, - const char *path, - unsigned int weight) -{ - char *str; - struct stat sb; - int ret; - - if (weight && (weight > 1000 || weight < 100)) - return -EINVAL; - - if (stat(path, &sb) < 0) - return -errno; - - if (!S_ISBLK(sb.st_mode)) - return -EINVAL; - - if (virAsprintf(&str, "%d:%d %d", major(sb.st_rdev), minor(sb.st_rdev), - weight) < 0) - return -errno; - - ret = virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_BLKIO, - "blkio.weight_device", - str); - VIR_FREE(str); - return ret; -} -#else -int -virCgroupSetBlkioDeviceWeight(virCgroupPtr group ATTRIBUTE_UNUSED, - const char *path ATTRIBUTE_UNUSED, - unsigned int weight ATTRIBUTE_UNUSED) -{ - return -ENOSYS; -} -#endif - -/** - * virCgroupSetMemory: - * - * @group: The cgroup to change memory for - * @kb: The memory amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb) -{ - unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; - - if (kb > maxkb) - return -EINVAL; - else if (kb == maxkb) - return virCgroupSetValueI64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.limit_in_bytes", - -1); - else - return virCgroupSetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.limit_in_bytes", - kb << 10); -} - -/** - * virCgroupGetMemoryUsage: - * - * @group: The cgroup to change memory for - * @kb: Pointer to returned used memory in kilobytes - * - * Returns: 0 on success - */ -int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb) -{ - long long unsigned int usage_in_bytes; - int ret; - ret = virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.usage_in_bytes", &usage_in_bytes); - if (ret == 0) - *kb = (unsigned long) usage_in_bytes >> 10; - return ret; -} - -/** - * virCgroupSetMemoryHardLimit: - * - * @group: The cgroup to change memory hard limit for - * @kb: The memory amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb) -{ - return virCgroupSetMemory(group, kb); -} - -/** - * virCgroupGetMemoryHardLimit: - * - * @group: The cgroup to get the memory hard limit for - * @kb: The memory amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb) -{ - long long unsigned int limit_in_bytes; - int ret; - ret = virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.limit_in_bytes", &limit_in_bytes); - if (ret == 0) - *kb = limit_in_bytes >> 10; - return ret; -} - -/** - * virCgroupSetMemorySoftLimit: - * - * @group: The cgroup to change memory soft limit for - * @kb: The memory amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb) -{ - unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; - - if (kb > maxkb) - return -EINVAL; - else if (kb == maxkb) - return virCgroupSetValueI64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.soft_limit_in_bytes", - -1); - else - return virCgroupSetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.soft_limit_in_bytes", - kb << 10); -} - - -/** - * virCgroupGetMemorySoftLimit: - * - * @group: The cgroup to get the memory soft limit for - * @kb: The memory amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb) -{ - long long unsigned int limit_in_bytes; - int ret; - ret = virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.soft_limit_in_bytes", &limit_in_bytes); - if (ret == 0) - *kb = limit_in_bytes >> 10; - return ret; -} - -/** - * virCgroupSetMemSwapHardLimit: - * - * @group: The cgroup to change mem+swap hard limit for - * @kb: The mem+swap amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb) -{ - unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; - - if (kb > maxkb) - return -EINVAL; - else if (kb == maxkb) - return virCgroupSetValueI64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.memsw.limit_in_bytes", - -1); - else - return virCgroupSetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.memsw.limit_in_bytes", - kb << 10); -} - -/** - * virCgroupGetMemSwapHardLimit: - * - * @group: The cgroup to get mem+swap hard limit for - * @kb: The mem+swap amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb) -{ - long long unsigned int limit_in_bytes; - int ret; - ret = virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.memsw.limit_in_bytes", &limit_in_bytes); - if (ret == 0) - *kb = limit_in_bytes >> 10; - return ret; -} - -/** - * virCgroupGetMemSwapUsage: - * - * @group: The cgroup to get mem+swap usage for - * @kb: The mem+swap amount in kilobytes - * - * Returns: 0 on success - */ -int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb) -{ - long long unsigned int usage_in_bytes; - int ret; - ret = virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_MEMORY, - "memory.memsw.usage_in_bytes", &usage_in_bytes); - if (ret == 0) - *kb = usage_in_bytes >> 10; - return ret; -} - -/** - * virCgroupSetCpusetMems: - * - * @group: The cgroup to set cpuset.mems for - * @mems: the numa nodes to set - * - * Returns: 0 on success - */ -int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems) -{ - return virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_CPUSET, - "cpuset.mems", - mems); -} - -/** - * virCgroupGetCpusetMems: - * - * @group: The cgroup to get cpuset.mems for - * @mems: the numa nodes to get - * - * Returns: 0 on success - */ -int virCgroupGetCpusetMems(virCgroupPtr group, char **mems) -{ - return virCgroupGetValueStr(group, - VIR_CGROUP_CONTROLLER_CPUSET, - "cpuset.mems", - mems); -} - -/** - * virCgroupSetCpusetCpus: - * - * @group: The cgroup to set cpuset.cpus for - * @cpus: the cpus to set - * - * Retuens: 0 on success - */ -int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus) -{ - return virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_CPUSET, - "cpuset.cpus", - cpus); -} - -/** - * virCgroupGetCpusetCpus: - * - * @group: The cgroup to get cpuset.cpus for - * @cpus: the cpus to get - * - * Retuens: 0 on success - */ -int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus) -{ - return virCgroupGetValueStr(group, - VIR_CGROUP_CONTROLLER_CPUSET, - "cpuset.cpus", - cpus); -} - -/** - * virCgroupDenyAllDevices: - * - * @group: The cgroup to deny all permissions, for all devices - * - * Returns: 0 on success - */ -int virCgroupDenyAllDevices(virCgroupPtr group) -{ - return virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_DEVICES, - "devices.deny", - "a"); -} - -/** - * virCgroupAllowDevice: - * - * @group: The cgroup to allow a device for - * @type: The device type (i.e., 'c' or 'b') - * @major: The major number of the device - * @minor: The minor number of the device - * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow - * - * Returns: 0 on success - */ -int virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor, - int perms) -{ - int rc; - char *devstr = NULL; - - if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor, - perms & VIR_CGROUP_DEVICE_READ ? "r" : "", - perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", - perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { - rc = -ENOMEM; - goto out; - } - - rc = virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_DEVICES, - "devices.allow", - devstr); -out: - VIR_FREE(devstr); - - return rc; -} - -/** - * virCgroupAllowDeviceMajor: - * - * @group: The cgroup to allow an entire device major type for - * @type: The device type (i.e., 'c' or 'b') - * @major: The major number of the device type - * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow - * - * Returns: 0 on success - */ -int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major, - int perms) -{ - int rc; - char *devstr = NULL; - - if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major, - perms & VIR_CGROUP_DEVICE_READ ? "r" : "", - perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", - perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { - rc = -ENOMEM; - goto out; - } - - rc = virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_DEVICES, - "devices.allow", - devstr); - out: - VIR_FREE(devstr); - - return rc; -} - -/** - * virCgroupAllowDevicePath: - * - * @group: The cgroup to allow the device for - * @path: the device to allow - * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow - * - * Queries the type of device and its major/minor number, and - * adds that to the cgroup ACL - * - * Returns: 0 on success, 1 if path exists but is not a device, or - * negative errno value on failure - */ -#if defined(major) && defined(minor) -int virCgroupAllowDevicePath(virCgroupPtr group, const char *path, int perms) -{ - struct stat sb; - - if (stat(path, &sb) < 0) - return -errno; - - if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) - return 1; - - return virCgroupAllowDevice(group, - S_ISCHR(sb.st_mode) ? 'c' : 'b', - major(sb.st_rdev), - minor(sb.st_rdev), - perms); -} -#else -int virCgroupAllowDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED, - const char *path ATTRIBUTE_UNUSED, - int perms ATTRIBUTE_UNUSED) -{ - return -ENOSYS; -} -#endif - - -/** - * virCgroupDenyDevice: - * - * @group: The cgroup to deny a device for - * @type: The device type (i.e., 'c' or 'b') - * @major: The major number of the device - * @minor: The minor number of the device - * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny - * - * Returns: 0 on success - */ -int virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor, - int perms) -{ - int rc; - char *devstr = NULL; - - if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor, - perms & VIR_CGROUP_DEVICE_READ ? "r" : "", - perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", - perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { - rc = -ENOMEM; - goto out; - } - - rc = virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_DEVICES, - "devices.deny", - devstr); -out: - VIR_FREE(devstr); - - return rc; -} - -/** - * virCgroupDenyDeviceMajor: - * - * @group: The cgroup to deny an entire device major type for - * @type: The device type (i.e., 'c' or 'b') - * @major: The major number of the device type - * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny - * - * Returns: 0 on success - */ -int virCgroupDenyDeviceMajor(virCgroupPtr group, char type, int major, - int perms) -{ - int rc; - char *devstr = NULL; - - if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major, - perms & VIR_CGROUP_DEVICE_READ ? "r" : "", - perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", - perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { - rc = -ENOMEM; - goto out; - } - - rc = virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_DEVICES, - "devices.deny", - devstr); - out: - VIR_FREE(devstr); - - return rc; -} - -#if defined(major) && defined(minor) -int virCgroupDenyDevicePath(virCgroupPtr group, const char *path, int perms) -{ - struct stat sb; - - if (stat(path, &sb) < 0) - return -errno; - - if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) - return 1; - - return virCgroupDenyDevice(group, - S_ISCHR(sb.st_mode) ? 'c' : 'b', - major(sb.st_rdev), - minor(sb.st_rdev), - perms); -} -#else -int virCgroupDenyDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED, - const char *path ATTRIBUTE_UNUSED, - int perms ATTRIBUTE_UNUSED) -{ - return -ENOSYS; -} -#endif - -int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares) -{ - return virCgroupSetValueU64(group, - VIR_CGROUP_CONTROLLER_CPU, - "cpu.shares", shares); -} - -int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares) -{ - return virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_CPU, - "cpu.shares", shares); -} - -/** - * virCgroupSetCpuCfsPeriod: - * - * @group: The cgroup to change cpu.cfs_period_us for - * @cfs_period: The bandwidth period in usecs - * - * Returns: 0 on success - */ -int virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period) -{ - /* The cfs_period shoule be greater or equal than 1ms, and less or equal - * than 1s. - */ - if (cfs_period < 1000 || cfs_period > 1000000) - return -EINVAL; - - return virCgroupSetValueU64(group, - VIR_CGROUP_CONTROLLER_CPU, - "cpu.cfs_period_us", cfs_period); -} - -/** - * virCgroupGetCpuCfsPeriod: - * - * @group: The cgroup to get cpu.cfs_period_us for - * @cfs_period: Pointer to the returned bandwidth period in usecs - * - * Returns: 0 on success - */ -int virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period) -{ - return virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_CPU, - "cpu.cfs_period_us", cfs_period); -} - -/** - * virCgroupSetCpuCfsQuota: - * - * @group: The cgroup to change cpu.cfs_quota_us for - * @cfs_quota: the cpu bandwidth (in usecs) that this tg will be allowed to - * consume over period - * - * Returns: 0 on success - */ -int virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota) -{ - if (cfs_quota >= 0) { - /* The cfs_quota shoule be greater or equal than 1ms */ - if (cfs_quota < 1000) - return -EINVAL; - - /* check overflow */ - if (cfs_quota > ULLONG_MAX / 1000) - return -EINVAL; - } - - return virCgroupSetValueI64(group, - VIR_CGROUP_CONTROLLER_CPU, - "cpu.cfs_quota_us", cfs_quota); -} - -/** - * virCgroupGetCpuCfsQuota: - * - * @group: The cgroup to get cpu.cfs_quota_us for - * @cfs_quota: Pointer to the returned cpu bandwidth (in usecs) that this tg - * will be allowed to consume over period - * - * Returns: 0 on success - */ -int virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota) -{ - return virCgroupGetValueI64(group, - VIR_CGROUP_CONTROLLER_CPU, - "cpu.cfs_quota_us", cfs_quota); -} - -int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage) -{ - return virCgroupGetValueU64(group, - VIR_CGROUP_CONTROLLER_CPUACCT, - "cpuacct.usage", usage); -} - -int virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage) -{ - return virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT, - "cpuacct.usage_percpu", usage); -} - -#ifdef _SC_CLK_TCK -int virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user, - unsigned long long *sys) -{ - char *str; - char *p; - int ret; - static double scale = -1.0; - - if ((ret = virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT, - "cpuacct.stat", &str)) < 0) - return ret; - if (!(p = STRSKIP(str, "user ")) || - virStrToLong_ull(p, &p, 10, user) < 0 || - !(p = STRSKIP(p, "\nsystem ")) || - virStrToLong_ull(p, NULL, 10, sys) < 0) { - ret = -EINVAL; - goto cleanup; - } - /* times reported are in system ticks (generally 100 Hz), but that - * rate can theoretically vary between machines. Scale things - * into approximate nanoseconds. */ - if (scale < 0) { - long ticks_per_sec = sysconf(_SC_CLK_TCK); - if (ticks_per_sec == -1) { - ret = -errno; - goto cleanup; - } - scale = 1000000000.0 / ticks_per_sec; - } - *user *= scale; - *sys *= scale; - - ret = 0; -cleanup: - VIR_FREE(str); - return ret; -} -#else -int virCgroupGetCpuacctStat(virCgroupPtr group ATTRIBUTE_UNUSED, - unsigned long long *user ATTRIBUTE_UNUSED, - unsigned long long *sys ATTRIBUTE_UNUSED) -{ - return -ENOSYS; -} -#endif - -int virCgroupSetFreezerState(virCgroupPtr group, const char *state) -{ - return virCgroupSetValueStr(group, - VIR_CGROUP_CONTROLLER_FREEZER, - "freezer.state", state); -} - -int virCgroupGetFreezerState(virCgroupPtr group, char **state) -{ - return virCgroupGetValueStr(group, - VIR_CGROUP_CONTROLLER_FREEZER, - "freezer.state", state); -} - - -#if defined HAVE_KILL && defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R -static int virCgroupKillInternal(virCgroupPtr group, int signum, virHashTablePtr pids) -{ - int rc; - int killedAny = 0; - char *keypath = NULL; - bool done = false; - FILE *fp = NULL; - VIR_DEBUG("group=%p path=%s signum=%d pids=%p", - group, group->path, signum, pids); - - rc = virCgroupPathOfController(group, -1, "tasks", &keypath); - if (rc != 0) { - VIR_DEBUG("No path of %s, tasks", group->path); - return rc; - } - - /* PIDs may be forking as we kill them, so loop - * until there are no new PIDs found - */ - while (!done) { - done = true; - if (!(fp = fopen(keypath, "r"))) { - rc = -errno; - VIR_DEBUG("Failed to read %s: %m\n", keypath); - goto cleanup; - } else { - while (!feof(fp)) { - unsigned long pid_value; - if (fscanf(fp, "%lu", &pid_value) != 1) { - if (feof(fp)) - break; - rc = -errno; - VIR_DEBUG("Failed to read %s: %m\n", keypath); - goto cleanup; - } - if (virHashLookup(pids, (void*)pid_value)) - continue; - - VIR_DEBUG("pid=%lu", pid_value); - /* Cgroups is a Linux concept, so this cast is safe. */ - if (kill((pid_t)pid_value, signum) < 0) { - if (errno != ESRCH) { - rc = -errno; - goto cleanup; - } - /* Leave RC == 0 since we didn't kill one */ - } else { - killedAny = 1; - done = false; - } - - ignore_value(virHashAddEntry(pids, (void*)pid_value, (void*)1)); - } - VIR_FORCE_FCLOSE(fp); - } - } - - rc = killedAny ? 1 : 0; - -cleanup: - VIR_FREE(keypath); - VIR_FORCE_FCLOSE(fp); - - return rc; -} - - -static uint32_t virCgroupPidCode(const void *name, uint32_t seed) -{ - unsigned long pid_value = (unsigned long)(intptr_t)name; - return virHashCodeGen(&pid_value, sizeof(pid_value), seed); -} -static bool virCgroupPidEqual(const void *namea, const void *nameb) -{ - return namea == nameb; -} -static void *virCgroupPidCopy(const void *name) -{ - return (void*)name; -} - -/* - * Returns - * < 0 : errno that occurred - * 0 : no PIDs killed - * 1 : at least one PID killed - */ -int virCgroupKill(virCgroupPtr group, int signum) -{ - VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum); - int rc; - /* The 'tasks' file in cgroups can contain duplicated - * pids, so we use a hash to track which we've already - * killed. - */ - virHashTablePtr pids = virHashCreateFull(100, - NULL, - virCgroupPidCode, - virCgroupPidEqual, - virCgroupPidCopy, - NULL); - - rc = virCgroupKillInternal(group, signum, pids); - - virHashFree(pids); - - return rc; -} - - -static int virCgroupKillRecursiveInternal(virCgroupPtr group, int signum, virHashTablePtr pids, bool dormdir) -{ - int rc; - int killedAny = 0; - char *keypath = NULL; - DIR *dp; - virCgroupPtr subgroup = NULL; - struct dirent *ent; - VIR_DEBUG("group=%p path=%s signum=%d pids=%p", group, group->path, signum, pids); - - rc = virCgroupPathOfController(group, -1, "", &keypath); - if (rc != 0) { - VIR_DEBUG("No path of %s, tasks", group->path); - return rc; - } - - if ((rc = virCgroupKillInternal(group, signum, pids)) != 0) - return rc; - - VIR_DEBUG("Iterate over children of %s", keypath); - if (!(dp = opendir(keypath))) { - rc = -errno; - return rc; - } - - while ((ent = readdir(dp))) { - char *subpath; - - if (STREQ(ent->d_name, ".")) - continue; - if (STREQ(ent->d_name, "..")) - continue; - if (ent->d_type != DT_DIR) - continue; - - VIR_DEBUG("Process subdir %s", ent->d_name); - if (virAsprintf(&subpath, "%s/%s", group->path, ent->d_name) < 0) { - rc = -ENOMEM; - goto cleanup; - } - - if ((rc = virCgroupNew(subpath, &subgroup)) != 0) - goto cleanup; - - if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, true)) < 0) - goto cleanup; - if (rc == 1) - killedAny = 1; - - if (dormdir) - virCgroupRemove(subgroup); - - virCgroupFree(&subgroup); - } - - rc = killedAny; - -cleanup: - virCgroupFree(&subgroup); - closedir(dp); - - return rc; -} - -int virCgroupKillRecursive(virCgroupPtr group, int signum) -{ - int rc; - VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum); - virHashTablePtr pids = virHashCreateFull(100, - NULL, - virCgroupPidCode, - virCgroupPidEqual, - virCgroupPidCopy, - NULL); - - rc = virCgroupKillRecursiveInternal(group, signum, pids, false); - - virHashFree(pids); - - return rc; -} - - -int virCgroupKillPainfully(virCgroupPtr group) -{ - int i; - int rc; - VIR_DEBUG("cgroup=%p path=%s", group, group->path); - for (i = 0 ; i < 15 ; i++) { - int signum; - if (i == 0) - signum = SIGTERM; - else if (i == 8) - signum = SIGKILL; - else - signum = 0; /* Just check for existence */ - - rc = virCgroupKillRecursive(group, signum); - VIR_DEBUG("Iteration %d rc=%d", i, rc); - /* If rc == -1 we hit error, if 0 we ran out of PIDs */ - if (rc <= 0) - break; - - usleep(200 * 1000); - } - VIR_DEBUG("Complete %d", rc); - return rc; -} - -#else /* !(HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R) */ -int virCgroupKill(virCgroupPtr group ATTRIBUTE_UNUSED, - int signum ATTRIBUTE_UNUSED) -{ - return -ENOSYS; -} -int virCgroupKillRecursive(virCgroupPtr group ATTRIBUTE_UNUSED, - int signum ATTRIBUTE_UNUSED) -{ - return -ENOSYS; -} - -int virCgroupKillPainfully(virCgroupPtr group ATTRIBUTE_UNUSED) -{ - return -ENOSYS; -} -#endif /* HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R */ diff --git a/src/util/cgroup.h b/src/util/cgroup.h deleted file mode 100644 index fc9e409..0000000 --- a/src/util/cgroup.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * cgroup.h: Interface to tools for managing cgroups - * - * Copyright (C) 2011-2012 Red Hat, Inc. - * Copyright IBM Corp. 2008 - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see - * <http://www.gnu.org/licenses/>. - * - * Authors: - * Dan Smith <danms@xxxxxxxxxx> - */ - -#ifndef CGROUP_H -# define CGROUP_H - -struct virCgroup; -typedef struct virCgroup *virCgroupPtr; - -enum { - VIR_CGROUP_CONTROLLER_CPU, - VIR_CGROUP_CONTROLLER_CPUACCT, - VIR_CGROUP_CONTROLLER_CPUSET, - VIR_CGROUP_CONTROLLER_MEMORY, - VIR_CGROUP_CONTROLLER_DEVICES, - VIR_CGROUP_CONTROLLER_FREEZER, - VIR_CGROUP_CONTROLLER_BLKIO, - - VIR_CGROUP_CONTROLLER_LAST -}; - -VIR_ENUM_DECL(virCgroupController); - -int virCgroupForDriver(const char *name, - virCgroupPtr *group, - bool privileged, - bool create); - -int virCgroupGetAppRoot(virCgroupPtr *group); - -int virCgroupForDomain(virCgroupPtr driver, - const char *name, - virCgroupPtr *group, - bool create); - -int virCgroupForVcpu(virCgroupPtr driver, - int vcpuid, - virCgroupPtr *group, - bool create); - -int virCgroupForEmulator(virCgroupPtr driver, - virCgroupPtr *group, - bool create); - -int virCgroupPathOfController(virCgroupPtr group, - int controller, - const char *key, - char **path); - -int virCgroupAddTask(virCgroupPtr group, pid_t pid); - -int virCgroupAddTaskController(virCgroupPtr group, - pid_t pid, - int controller); - -int virCgroupMoveTask(virCgroupPtr src_group, - virCgroupPtr dest_group, - int controller); - -int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight); -int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight); - -int virCgroupSetBlkioDeviceWeight(virCgroupPtr group, - const char *path, - unsigned int weight); - -int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb); -int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb); - -int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb); -int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb); -int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb); -int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb); -int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb); -int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb); -int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb); - -enum { - VIR_CGROUP_DEVICE_READ = 1, - VIR_CGROUP_DEVICE_WRITE = 2, - VIR_CGROUP_DEVICE_MKNOD = 4, - VIR_CGROUP_DEVICE_RW = VIR_CGROUP_DEVICE_READ | VIR_CGROUP_DEVICE_WRITE, - VIR_CGROUP_DEVICE_RWM = VIR_CGROUP_DEVICE_RW | VIR_CGROUP_DEVICE_MKNOD, -}; - -int virCgroupDenyAllDevices(virCgroupPtr group); - -int virCgroupAllowDevice(virCgroupPtr group, - char type, - int major, - int minor, - int perms); -int virCgroupAllowDeviceMajor(virCgroupPtr group, - char type, - int major, - int perms); -int virCgroupAllowDevicePath(virCgroupPtr group, - const char *path, - int perms); - -int virCgroupDenyDevice(virCgroupPtr group, - char type, - int major, - int minor, - int perms); -int virCgroupDenyDeviceMajor(virCgroupPtr group, - char type, - int major, - int perms); -int virCgroupDenyDevicePath(virCgroupPtr group, - const char *path, - int perms); - -int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares); -int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares); - -int virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period); -int virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period); - -int virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota); -int virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota); - -int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage); -int virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage); -int virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user, - unsigned long long *sys); - -int virCgroupSetFreezerState(virCgroupPtr group, const char *state); -int virCgroupGetFreezerState(virCgroupPtr group, char **state); - -int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems); -int virCgroupGetCpusetMems(virCgroupPtr group, char **mems); - -int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus); -int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus); - -int virCgroupRemove(virCgroupPtr group); - -void virCgroupFree(virCgroupPtr *group); -bool virCgroupMounted(virCgroupPtr cgroup, int controller); - -int virCgroupKill(virCgroupPtr group, int signum); -int virCgroupKillRecursive(virCgroupPtr group, int signum); -int virCgroupKillPainfully(virCgroupPtr group); - -#endif /* CGROUP_H */ diff --git a/src/util/vircgroup.c b/src/util/vircgroup.c new file mode 100644 index 0000000..e9fc67f --- /dev/null +++ b/src/util/vircgroup.c @@ -0,0 +1,2099 @@ +/* + * vircgroup.c: methods for managing control cgroups + * + * Copyright (C) 2010-2012 Red Hat, Inc. + * Copyright IBM Corp. 2008 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Authors: + * Dan Smith <danms@xxxxxxxxxx> + */ +#include <config.h> + +#include <stdio.h> +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +# include <mntent.h> +#endif +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <signal.h> +#include <libgen.h> +#include <dirent.h> + +#include "internal.h" +#include "util.h" +#include "memory.h" +#include "vircgroup.h" +#include "logging.h" +#include "virfile.h" +#include "virhash.h" +#include "virhashcode.h" + +#define CGROUP_MAX_VAL 512 + +VIR_ENUM_IMPL(virCgroupController, VIR_CGROUP_CONTROLLER_LAST, + "cpu", "cpuacct", "cpuset", "memory", "devices", + "freezer", "blkio"); + +struct virCgroupController { + int type; + char *mountPoint; + char *placement; +}; + +struct virCgroup { + char *path; + + struct virCgroupController controllers[VIR_CGROUP_CONTROLLER_LAST]; +}; + +typedef enum { + VIR_CGROUP_NONE = 0, /* create subdir under each cgroup if possible. */ + VIR_CGROUP_MEM_HIERACHY = 1 << 0, /* call virCgroupSetMemoryUseHierarchy + * before creating subcgroups and + * attaching tasks + */ + VIR_CGROUP_VCPU = 1 << 1, /* create subdir only under the cgroup cpu, + * cpuacct and cpuset if possible. */ +} virCgroupFlags; + +/** + * virCgroupFree: + * + * @group: The group structure to free + */ +void virCgroupFree(virCgroupPtr *group) +{ + int i; + + if (*group == NULL) + return; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + VIR_FREE((*group)->controllers[i].mountPoint); + VIR_FREE((*group)->controllers[i].placement); + } + + VIR_FREE((*group)->path); + VIR_FREE(*group); +} + +/** + * virCgroupMounted: query whether a cgroup subsystem is mounted or not + * + * @cgroup: The group structure to be queried + * @controller: cgroup subsystem id + * + * Returns true if a cgroup is subsystem is mounted. + */ +bool virCgroupMounted(virCgroupPtr cgroup, int controller) +{ + return cgroup->controllers[controller].mountPoint != NULL; +} + +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +/* + * Process /proc/mounts figuring out what controllers are + * mounted and where + */ +static int virCgroupDetectMounts(virCgroupPtr group) +{ + int i; + FILE *mounts = NULL; + struct mntent entry; + char buf[CGROUP_MAX_VAL]; + + mounts = fopen("/proc/mounts", "r"); + if (mounts == NULL) { + VIR_ERROR(_("Unable to open /proc/mounts")); + return -ENOENT; + } + + while (getmntent_r(mounts, &entry, buf, sizeof(buf)) != NULL) { + if (STRNEQ(entry.mnt_type, "cgroup")) + continue; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *typestr = virCgroupControllerTypeToString(i); + int typelen = strlen(typestr); + char *tmp = entry.mnt_opts; + while (tmp) { + char *next = strchr(tmp, ','); + int len; + if (next) { + len = next-tmp; + next++; + } else { + len = strlen(tmp); + } + /* NB, the same controller can appear >1 time in mount list + * due to bind mounts from one location to another. Pick the + * first entry only + */ + if (typelen == len && STREQLEN(typestr, tmp, len) && + !group->controllers[i].mountPoint && + !(group->controllers[i].mountPoint = strdup(entry.mnt_dir))) + goto no_memory; + tmp = next; + } + } + } + + VIR_FORCE_FCLOSE(mounts); + + return 0; + +no_memory: + VIR_FORCE_FCLOSE(mounts); + return -ENOMEM; +} + + +/* + * Process /proc/self/cgroup figuring out what cgroup + * sub-path the current process is assigned to. ie not + * necessarily in the root + */ +static int virCgroupDetectPlacement(virCgroupPtr group) +{ + int i; + FILE *mapping = NULL; + char line[1024]; + + mapping = fopen("/proc/self/cgroup", "r"); + if (mapping == NULL) { + VIR_ERROR(_("Unable to open /proc/self/cgroup")); + return -ENOENT; + } + + while (fgets(line, sizeof(line), mapping) != NULL) { + char *controllers = strchr(line, ':'); + char *path = controllers ? strchr(controllers+1, ':') : NULL; + char *nl = path ? strchr(path, '\n') : NULL; + + if (!controllers || !path) + continue; + + if (nl) + *nl = '\0'; + + *path = '\0'; + controllers++; + path++; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + const char *typestr = virCgroupControllerTypeToString(i); + int typelen = strlen(typestr); + char *tmp = controllers; + while (tmp) { + char *next = strchr(tmp, ','); + int len; + if (next) { + len = next-tmp; + next++; + } else { + len = strlen(tmp); + } + if (typelen == len && STREQLEN(typestr, tmp, len) && + !(group->controllers[i].placement = strdup(STREQ(path, "/") ? "" : path))) + goto no_memory; + + tmp = next; + } + } + } + + VIR_FORCE_FCLOSE(mapping); + + return 0; + +no_memory: + VIR_FORCE_FCLOSE(mapping); + return -ENOMEM; + +} + +static int virCgroupDetect(virCgroupPtr group) +{ + int any = 0; + int rc; + int i; + + rc = virCgroupDetectMounts(group); + if (rc < 0) { + VIR_ERROR(_("Failed to detect mounts for %s"), group->path); + return rc; + } + + /* Check that at least 1 controller is available */ + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (group->controllers[i].mountPoint != NULL) + any = 1; + } + if (!any) + return -ENXIO; + + + rc = virCgroupDetectPlacement(group); + + if (rc == 0) { + /* Check that for every mounted controller, we found our placement */ + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (!group->controllers[i].mountPoint) + continue; + + if (!group->controllers[i].placement) { + VIR_ERROR(_("Could not find placement for controller %s at %s"), + virCgroupControllerTypeToString(i), + group->controllers[i].placement); + rc = -ENOENT; + break; + } + + VIR_DEBUG("Detected mount/mapping %i:%s at %s in %s", i, + virCgroupControllerTypeToString(i), + group->controllers[i].mountPoint, + group->controllers[i].placement); + } + } else { + VIR_ERROR(_("Failed to detect mapping for %s"), group->path); + } + + return rc; +} +#endif + + +int virCgroupPathOfController(virCgroupPtr group, + int controller, + const char *key, + char **path) +{ + if (controller == -1) { + int i; + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + if (group->controllers[i].mountPoint && + group->controllers[i].placement) { + controller = i; + break; + } + } + } + if (controller == -1) + return -ENOSYS; + + if (group->controllers[controller].mountPoint == NULL) + return -ENOENT; + + if (group->controllers[controller].placement == NULL) + return -ENOENT; + + if (virAsprintf(path, "%s%s%s/%s", + group->controllers[controller].mountPoint, + group->controllers[controller].placement, + STREQ(group->path, "/") ? "" : group->path, + key ? key : "") == -1) + return -ENOMEM; + + return 0; +} + + +static int virCgroupSetValueStr(virCgroupPtr group, + int controller, + const char *key, + const char *value) +{ + int rc = 0; + char *keypath = NULL; + + rc = virCgroupPathOfController(group, controller, key, &keypath); + if (rc != 0) + return rc; + + VIR_DEBUG("Set value '%s' to '%s'", keypath, value); + rc = virFileWriteStr(keypath, value, 0); + if (rc < 0) { + rc = -errno; + VIR_DEBUG("Failed to write value '%s': %m", value); + } else { + rc = 0; + } + + VIR_FREE(keypath); + + return rc; +} + +static int virCgroupGetValueStr(virCgroupPtr group, + int controller, + const char *key, + char **value) +{ + int rc; + char *keypath = NULL; + + *value = NULL; + + rc = virCgroupPathOfController(group, controller, key, &keypath); + if (rc != 0) { + VIR_DEBUG("No path of %s, %s", group->path, key); + return rc; + } + + VIR_DEBUG("Get value %s", keypath); + + rc = virFileReadAll(keypath, 1024*1024, value); + if (rc < 0) { + rc = -errno; + VIR_DEBUG("Failed to read %s: %m\n", keypath); + } else { + /* Terminated with '\n' has sometimes harmful effects to the caller */ + if ((*value)[rc - 1] == '\n') + (*value)[rc - 1] = '\0'; + + rc = 0; + } + + VIR_FREE(keypath); + + return rc; +} + +static int virCgroupSetValueU64(virCgroupPtr group, + int controller, + const char *key, + unsigned long long int value) +{ + char *strval = NULL; + int rc; + + if (virAsprintf(&strval, "%llu", value) == -1) + return -ENOMEM; + + rc = virCgroupSetValueStr(group, controller, key, strval); + + VIR_FREE(strval); + + return rc; +} + + + +static int virCgroupSetValueI64(virCgroupPtr group, + int controller, + const char *key, + long long int value) +{ + char *strval = NULL; + int rc; + + if (virAsprintf(&strval, "%lld", value) == -1) + return -ENOMEM; + + rc = virCgroupSetValueStr(group, controller, key, strval); + + VIR_FREE(strval); + + return rc; +} + +static int virCgroupGetValueI64(virCgroupPtr group, + int controller, + const char *key, + long long int *value) +{ + char *strval = NULL; + int rc = 0; + + rc = virCgroupGetValueStr(group, controller, key, &strval); + if (rc != 0) + goto out; + + if (virStrToLong_ll(strval, NULL, 10, value) < 0) + rc = -EINVAL; +out: + VIR_FREE(strval); + + return rc; +} + +static int virCgroupGetValueU64(virCgroupPtr group, + int controller, + const char *key, + unsigned long long int *value) +{ + char *strval = NULL; + int rc = 0; + + rc = virCgroupGetValueStr(group, controller, key, &strval); + if (rc != 0) + goto out; + + if (virStrToLong_ull(strval, NULL, 10, value) < 0) + rc = -EINVAL; +out: + VIR_FREE(strval); + + return rc; +} + + +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +static int virCgroupCpuSetInherit(virCgroupPtr parent, virCgroupPtr group) +{ + int i; + int rc = 0; + const char *inherit_values[] = { + "cpuset.cpus", + "cpuset.mems", + }; + + VIR_DEBUG("Setting up inheritance %s -> %s", parent->path, group->path); + for (i = 0; i < ARRAY_CARDINALITY(inherit_values) ; i++) { + char *value; + + rc = virCgroupGetValueStr(parent, + VIR_CGROUP_CONTROLLER_CPUSET, + inherit_values[i], + &value); + if (rc != 0) { + VIR_ERROR(_("Failed to get %s %d"), inherit_values[i], rc); + break; + } + + VIR_DEBUG("Inherit %s = %s", inherit_values[i], value); + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + inherit_values[i], + value); + VIR_FREE(value); + + if (rc != 0) { + VIR_ERROR(_("Failed to set %s %d"), inherit_values[i], rc); + break; + } + } + + return rc; +} + +static int virCgroupSetMemoryUseHierarchy(virCgroupPtr group) +{ + int rc = 0; + unsigned long long value; + const char *filename = "memory.use_hierarchy"; + + rc = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + filename, &value); + if (rc != 0) { + VIR_ERROR(_("Failed to read %s/%s (%d)"), group->path, filename, rc); + return rc; + } + + /* Setting twice causes error, so if already enabled, skip setting */ + if (value == 1) + return 0; + + VIR_DEBUG("Setting up %s/%s", group->path, filename); + rc = virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + filename, 1); + + if (rc != 0) { + VIR_ERROR(_("Failed to set %s/%s (%d)"), group->path, filename, rc); + } + + return rc; +} + +static int virCgroupMakeGroup(virCgroupPtr parent, + virCgroupPtr group, + bool create, + unsigned int flags) +{ + int i; + int rc = 0; + + VIR_DEBUG("Make group %s", group->path); + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + char *path = NULL; + + /* Skip over controllers that aren't mounted */ + if (!group->controllers[i].mountPoint) + continue; + + /* We need to control cpu bandwidth for each vcpu now */ + if ((flags & VIR_CGROUP_VCPU) && + (i != VIR_CGROUP_CONTROLLER_CPU && + i != VIR_CGROUP_CONTROLLER_CPUACCT && + i != VIR_CGROUP_CONTROLLER_CPUSET)) { + /* treat it as unmounted and we can use virCgroupAddTask */ + VIR_FREE(group->controllers[i].mountPoint); + continue; + } + + rc = virCgroupPathOfController(group, i, "", &path); + if (rc < 0) + return rc; + /* As of Feb 2011, clang can't see that the above function + * call did not modify group. */ + sa_assert(group->controllers[i].mountPoint); + + VIR_DEBUG("Make controller %s", path); + if (access(path, F_OK) != 0) { + if (!create || + mkdir(path, 0755) < 0) { + /* With a kernel that doesn't support multi-level directory + * for blkio controller, libvirt will fail and disable all + * other controllers even though they are available. So + * treat blkio as unmounted if mkdir fails. */ + if (i == VIR_CGROUP_CONTROLLER_BLKIO) { + rc = 0; + VIR_FREE(group->controllers[i].mountPoint); + VIR_FREE(path); + continue; + } else { + rc = -errno; + VIR_FREE(path); + break; + } + } + if (group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint != NULL && + (i == VIR_CGROUP_CONTROLLER_CPUSET || + STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_CPUSET].mountPoint))) { + rc = virCgroupCpuSetInherit(parent, group); + if (rc != 0) { + VIR_FREE(path); + break; + } + } + /* + * Note that virCgroupSetMemoryUseHierarchy should always be + * called prior to creating subcgroups and attaching tasks. + */ + if ((flags & VIR_CGROUP_MEM_HIERACHY) && + (group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint != NULL) && + (i == VIR_CGROUP_CONTROLLER_MEMORY || + STREQ(group->controllers[i].mountPoint, group->controllers[VIR_CGROUP_CONTROLLER_MEMORY].mountPoint))) { + rc = virCgroupSetMemoryUseHierarchy(group); + if (rc != 0) { + VIR_FREE(path); + break; + } + } + } + + VIR_FREE(path); + } + + return rc; +} + + +static int virCgroupNew(const char *path, + virCgroupPtr *group) +{ + int rc = 0; + char *typpath = NULL; + + VIR_DEBUG("New group %s", path); + *group = NULL; + + if (VIR_ALLOC((*group)) != 0) { + rc = -ENOMEM; + goto err; + } + + if (!((*group)->path = strdup(path))) { + rc = -ENOMEM; + goto err; + } + + rc = virCgroupDetect(*group); + if (rc < 0) + goto err; + + return rc; +err: + virCgroupFree(group); + *group = NULL; + + VIR_FREE(typpath); + + return rc; +} + +static int virCgroupAppRoot(bool privileged, + virCgroupPtr *group, + bool create) +{ + virCgroupPtr rootgrp = NULL; + int rc; + + rc = virCgroupNew("/", &rootgrp); + if (rc != 0) + return rc; + + if (privileged) { + rc = virCgroupNew("/libvirt", group); + } else { + char *rootname; + char *username; + username = virGetUserName(getuid()); + if (!username) { + rc = -ENOMEM; + goto cleanup; + } + rc = virAsprintf(&rootname, "/libvirt-%s", username); + VIR_FREE(username); + if (rc < 0) { + rc = -ENOMEM; + goto cleanup; + } + + rc = virCgroupNew(rootname, group); + VIR_FREE(rootname); + } + if (rc != 0) + goto cleanup; + + rc = virCgroupMakeGroup(rootgrp, *group, create, VIR_CGROUP_NONE); + +cleanup: + virCgroupFree(&rootgrp); + return rc; +} +#endif + +#if defined _DIRENT_HAVE_D_TYPE +static int virCgroupRemoveRecursively(char *grppath) +{ + DIR *grpdir; + struct dirent *ent; + int rc = 0; + + grpdir = opendir(grppath); + if (grpdir == NULL) { + if (errno == ENOENT) + return 0; + rc = -errno; + VIR_ERROR(_("Unable to open %s (%d)"), grppath, errno); + return rc; + } + + for (;;) { + char *path; + + errno = 0; + ent = readdir(grpdir); + if (ent == NULL) { + if ((rc = -errno)) + VIR_ERROR(_("Failed to readdir for %s (%d)"), grppath, errno); + break; + } + + if (ent->d_name[0] == '.') continue; + if (ent->d_type != DT_DIR) continue; + + if (virAsprintf(&path, "%s/%s", grppath, ent->d_name) == -1) { + rc = -ENOMEM; + break; + } + rc = virCgroupRemoveRecursively(path); + VIR_FREE(path); + if (rc != 0) + break; + } + closedir(grpdir); + + VIR_DEBUG("Removing cgroup %s", grppath); + if (rmdir(grppath) != 0 && errno != ENOENT) { + rc = -errno; + VIR_ERROR(_("Unable to remove %s (%d)"), grppath, errno); + } + + return rc; +} +#else +static int virCgroupRemoveRecursively(char *grppath ATTRIBUTE_UNUSED) +{ + /* Claim no support */ + return -ENXIO; +} +#endif + +/** + * virCgroupRemove: + * + * @group: The group to be removed + * + * It first removes all child groups recursively + * in depth first order and then removes @group + * because the presence of the child groups + * prevents removing @group. + * + * Returns: 0 on success + */ +int virCgroupRemove(virCgroupPtr group) +{ + int rc = 0; + int i; + char *grppath = NULL; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + /* Skip over controllers not mounted */ + if (!group->controllers[i].mountPoint) + continue; + + if (virCgroupPathOfController(group, + i, + NULL, + &grppath) != 0) + continue; + + VIR_DEBUG("Removing cgroup %s and all child cgroups", grppath); + rc = virCgroupRemoveRecursively(grppath); + VIR_FREE(grppath); + } + + return rc; +} + +/** + * virCgroupAddTask: + * + * @group: The cgroup to add a task to + * @pid: The pid of the task to add + * + * Returns: 0 on success + */ +int virCgroupAddTask(virCgroupPtr group, pid_t pid) +{ + int rc = 0; + int i; + + for (i = 0 ; i < VIR_CGROUP_CONTROLLER_LAST ; i++) { + /* Skip over controllers not mounted */ + if (!group->controllers[i].mountPoint) + continue; + + rc = virCgroupSetValueU64(group, i, "tasks", (unsigned long long)pid); + if (rc != 0) + break; + } + + return rc; +} + +/** + * virCgroupAddTaskController: + * + * @group: The cgroup to add a task to + * @pid: The pid of the task to add + * @controller: The cgroup controller to be operated on + * + * Returns: 0 on success or -errno on failure + */ +int virCgroupAddTaskController(virCgroupPtr group, pid_t pid, int controller) +{ + if (controller < 0 || controller >= VIR_CGROUP_CONTROLLER_LAST) + return -EINVAL; + + if (!group->controllers[controller].mountPoint) + return -EINVAL; + + return virCgroupSetValueU64(group, controller, "tasks", + (unsigned long long)pid); +} + + +static int virCgroupAddTaskStrController(virCgroupPtr group, + const char *pidstr, + int controller) +{ + char *str = NULL, *cur = NULL, *next = NULL; + unsigned long long p = 0; + int rc = 0; + char *endp; + + if (virAsprintf(&str, "%s", pidstr) < 0) + return -1; + + cur = str; + while (*cur != '\0') { + rc = virStrToLong_ull(cur, &endp, 10, &p); + if (rc != 0) + goto cleanup; + + rc = virCgroupAddTaskController(group, p, controller); + if (rc != 0) + goto cleanup; + + next = strchr(cur, '\n'); + if (next) { + cur = next + 1; + *next = '\0'; + } else { + break; + } + } + +cleanup: + VIR_FREE(str); + return rc; +} + +/** + * virCgroupMoveTask: + * + * @src_group: The source cgroup where all tasks are removed from + * @dest_group: The destination where all tasks are added to + * @controller: The cgroup controller to be operated on + * + * Returns: 0 on success or -errno on failure + */ +int virCgroupMoveTask(virCgroupPtr src_group, virCgroupPtr dest_group, + int controller) +{ + int rc = 0, err = 0; + char *content = NULL; + + if (controller < VIR_CGROUP_CONTROLLER_CPU || + controller > VIR_CGROUP_CONTROLLER_BLKIO) + return -EINVAL; + + if (!src_group->controllers[controller].mountPoint || + !dest_group->controllers[controller].mountPoint) { + VIR_WARN("no vm cgroup in controller %d", controller); + return 0; + } + + rc = virCgroupGetValueStr(src_group, controller, "tasks", &content); + if (rc != 0) + return rc; + + rc = virCgroupAddTaskStrController(dest_group, content, controller); + if (rc != 0) + goto cleanup; + + VIR_FREE(content); + + return 0; + +cleanup: + /* + * We don't need to recover dest_cgroup because cgroup will make sure + * that one task only resides in one cgroup of the same controller. + */ + err = virCgroupAddTaskStrController(src_group, content, controller); + if (err != 0) + VIR_ERROR(_("Cannot recover cgroup %s from %s"), + src_group->controllers[controller].mountPoint, + dest_group->controllers[controller].mountPoint); + VIR_FREE(content); + + return rc; +} + +/** + * virCgroupForDriver: + * + * @name: name of this driver (e.g., xen, qemu, lxc) + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +int virCgroupForDriver(const char *name, + virCgroupPtr *group, + bool privileged, + bool create) +{ + int rc; + char *path = NULL; + virCgroupPtr rootgrp = NULL; + + rc = virCgroupAppRoot(privileged, &rootgrp, create); + if (rc != 0) + goto out; + + if (virAsprintf(&path, "%s/%s", rootgrp->path, name) < 0) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0) { + rc = virCgroupMakeGroup(rootgrp, *group, create, VIR_CGROUP_NONE); + if (rc != 0) + virCgroupFree(group); + } + +out: + virCgroupFree(&rootgrp); + + return rc; +} +#else +int virCgroupForDriver(const char *name ATTRIBUTE_UNUSED, + virCgroupPtr *group ATTRIBUTE_UNUSED, + bool privileged ATTRIBUTE_UNUSED, + bool create ATTRIBUTE_UNUSED) +{ + /* Claim no support */ + return -ENXIO; +} +#endif + +/** +* virCgroupGetAppRoot: +* +* @group: Pointer to returned virCgroupPtr +* +* Returns 0 on success +*/ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +int virCgroupGetAppRoot(virCgroupPtr *group) +{ + return virCgroupNew("/", group); +} +#else +int virCgroupGetAppRoot(virCgroupPtr *group ATTRIBUTE_UNUSED) +{ + return -ENXIO; +} +#endif + +/** + * virCgroupForDomain: + * + * @driver: group for driver owning the domain + * @name: name of the domain + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +int virCgroupForDomain(virCgroupPtr driver, + const char *name, + virCgroupPtr *group, + bool create) +{ + int rc; + char *path; + + if (driver == NULL) + return -EINVAL; + + if (virAsprintf(&path, "%s/%s", driver->path, name) < 0) + return -ENOMEM; + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0) { + /* + * Create a cgroup with memory.use_hierarchy enabled to + * surely account memory usage of lxc with ns subsystem + * enabled. (To be exact, memory and ns subsystems are + * enabled at the same time.) + * + * The reason why doing it here, not a upper group, say + * a group for driver, is to avoid overhead to track + * cumulative usage that we don't need. + */ + rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_MEM_HIERACHY); + if (rc != 0) + virCgroupFree(group); + } + + return rc; +} +#else +int virCgroupForDomain(virCgroupPtr driver ATTRIBUTE_UNUSED, + const char *name ATTRIBUTE_UNUSED, + virCgroupPtr *group ATTRIBUTE_UNUSED, + bool create ATTRIBUTE_UNUSED) +{ + return -ENXIO; +} +#endif + +/** + * virCgroupForVcpu: + * + * @driver: group for the domain + * @vcpuid: id of the vcpu + * @group: Pointer to returned virCgroupPtr + * + * Returns 0 on success + */ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +int virCgroupForVcpu(virCgroupPtr driver, + int vcpuid, + virCgroupPtr *group, + bool create) +{ + int rc; + char *path; + + if (driver == NULL) + return -EINVAL; + + if (virAsprintf(&path, "%s/vcpu%d", driver->path, vcpuid) < 0) + return -ENOMEM; + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0) { + rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); + if (rc != 0) + virCgroupFree(group); + } + + return rc; +} +#else +int virCgroupForVcpu(virCgroupPtr driver ATTRIBUTE_UNUSED, + int vcpuid ATTRIBUTE_UNUSED, + virCgroupPtr *group ATTRIBUTE_UNUSED, + bool create ATTRIBUTE_UNUSED) +{ + return -ENXIO; +} +#endif + +/** + * virCgroupForEmulator: + * + * @driver: group for the domain + * @group: Pointer to returned virCgroupPtr + * + * Returns: 0 on success or -errno on failure + */ +#if defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +int virCgroupForEmulator(virCgroupPtr driver, + virCgroupPtr *group, + bool create) +{ + int rc; + char *path; + + if (driver == NULL) + return -EINVAL; + + if (virAsprintf(&path, "%s/emulator", driver->path) < 0) + return -ENOMEM; + + rc = virCgroupNew(path, group); + VIR_FREE(path); + + if (rc == 0) { + rc = virCgroupMakeGroup(driver, *group, create, VIR_CGROUP_VCPU); + if (rc != 0) + virCgroupFree(group); + } + + return rc; +} +#else +int virCgroupForEmulator(virCgroupPtr driver ATTRIBUTE_UNUSED, + virCgroupPtr *group ATTRIBUTE_UNUSED, + bool create ATTRIBUTE_UNUSED) +{ + return -ENXIO; +} + +#endif +/** + * virCgroupSetBlkioWeight: + * + * @group: The cgroup to change io weight for + * @weight: The Weight for this cgroup + * + * Returns: 0 on success + */ +int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight) +{ + if (weight > 1000 || weight < 100) + return -EINVAL; + + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_BLKIO, + "blkio.weight", + weight); +} + +/** + * virCgroupGetBlkioWeight: + * + * @group: The cgroup to get weight for + * @Weight: Pointer to returned weight + * + * Returns: 0 on success + */ +int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight) +{ + unsigned long long tmp; + int ret; + ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_BLKIO, + "blkio.weight", &tmp); + if (ret == 0) + *weight = tmp; + return ret; +} + +/** + * virCgroupSetBlkioDeviceWeight: + * + * @group: The cgroup to change io device weight device for + * @path: The device with a weight to alter + * @weight: The new device weight (100-1000), or 0 to clear + * + * device_weight is treated as a write-only parameter, so + * there isn't a getter counterpart. + * + * Returns: 0 on success, -errno on failure + */ +#if defined(major) && defined(minor) +int virCgroupSetBlkioDeviceWeight(virCgroupPtr group, + const char *path, + unsigned int weight) +{ + char *str; + struct stat sb; + int ret; + + if (weight && (weight > 1000 || weight < 100)) + return -EINVAL; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISBLK(sb.st_mode)) + return -EINVAL; + + if (virAsprintf(&str, "%d:%d %d", major(sb.st_rdev), minor(sb.st_rdev), + weight) < 0) + return -errno; + + ret = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_BLKIO, + "blkio.weight_device", + str); + VIR_FREE(str); + return ret; +} +#else +int +virCgroupSetBlkioDeviceWeight(virCgroupPtr group ATTRIBUTE_UNUSED, + const char *path ATTRIBUTE_UNUSED, + unsigned int weight ATTRIBUTE_UNUSED) +{ + return -ENOSYS; +} +#endif + +/** + * virCgroupSetMemory: + * + * @group: The cgroup to change memory for + * @kb: The memory amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb) +{ + unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; + + if (kb > maxkb) + return -EINVAL; + else if (kb == maxkb) + return virCgroupSetValueI64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.limit_in_bytes", + -1); + else + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.limit_in_bytes", + kb << 10); +} + +/** + * virCgroupGetMemoryUsage: + * + * @group: The cgroup to change memory for + * @kb: Pointer to returned used memory in kilobytes + * + * Returns: 0 on success + */ +int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb) +{ + long long unsigned int usage_in_bytes; + int ret; + ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.usage_in_bytes", &usage_in_bytes); + if (ret == 0) + *kb = (unsigned long) usage_in_bytes >> 10; + return ret; +} + +/** + * virCgroupSetMemoryHardLimit: + * + * @group: The cgroup to change memory hard limit for + * @kb: The memory amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb) +{ + return virCgroupSetMemory(group, kb); +} + +/** + * virCgroupGetMemoryHardLimit: + * + * @group: The cgroup to get the memory hard limit for + * @kb: The memory amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb) +{ + long long unsigned int limit_in_bytes; + int ret; + ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.limit_in_bytes", &limit_in_bytes); + if (ret == 0) + *kb = limit_in_bytes >> 10; + return ret; +} + +/** + * virCgroupSetMemorySoftLimit: + * + * @group: The cgroup to change memory soft limit for + * @kb: The memory amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb) +{ + unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; + + if (kb > maxkb) + return -EINVAL; + else if (kb == maxkb) + return virCgroupSetValueI64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.soft_limit_in_bytes", + -1); + else + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.soft_limit_in_bytes", + kb << 10); +} + + +/** + * virCgroupGetMemorySoftLimit: + * + * @group: The cgroup to get the memory soft limit for + * @kb: The memory amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb) +{ + long long unsigned int limit_in_bytes; + int ret; + ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.soft_limit_in_bytes", &limit_in_bytes); + if (ret == 0) + *kb = limit_in_bytes >> 10; + return ret; +} + +/** + * virCgroupSetMemSwapHardLimit: + * + * @group: The cgroup to change mem+swap hard limit for + * @kb: The mem+swap amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb) +{ + unsigned long long maxkb = VIR_DOMAIN_MEMORY_PARAM_UNLIMITED; + + if (kb > maxkb) + return -EINVAL; + else if (kb == maxkb) + return virCgroupSetValueI64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.memsw.limit_in_bytes", + -1); + else + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.memsw.limit_in_bytes", + kb << 10); +} + +/** + * virCgroupGetMemSwapHardLimit: + * + * @group: The cgroup to get mem+swap hard limit for + * @kb: The mem+swap amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb) +{ + long long unsigned int limit_in_bytes; + int ret; + ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.memsw.limit_in_bytes", &limit_in_bytes); + if (ret == 0) + *kb = limit_in_bytes >> 10; + return ret; +} + +/** + * virCgroupGetMemSwapUsage: + * + * @group: The cgroup to get mem+swap usage for + * @kb: The mem+swap amount in kilobytes + * + * Returns: 0 on success + */ +int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb) +{ + long long unsigned int usage_in_bytes; + int ret; + ret = virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_MEMORY, + "memory.memsw.usage_in_bytes", &usage_in_bytes); + if (ret == 0) + *kb = usage_in_bytes >> 10; + return ret; +} + +/** + * virCgroupSetCpusetMems: + * + * @group: The cgroup to set cpuset.mems for + * @mems: the numa nodes to set + * + * Returns: 0 on success + */ +int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.mems", + mems); +} + +/** + * virCgroupGetCpusetMems: + * + * @group: The cgroup to get cpuset.mems for + * @mems: the numa nodes to get + * + * Returns: 0 on success + */ +int virCgroupGetCpusetMems(virCgroupPtr group, char **mems) +{ + return virCgroupGetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.mems", + mems); +} + +/** + * virCgroupSetCpusetCpus: + * + * @group: The cgroup to set cpuset.cpus for + * @cpus: the cpus to set + * + * Retuens: 0 on success + */ +int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.cpus", + cpus); +} + +/** + * virCgroupGetCpusetCpus: + * + * @group: The cgroup to get cpuset.cpus for + * @cpus: the cpus to get + * + * Retuens: 0 on success + */ +int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus) +{ + return virCgroupGetValueStr(group, + VIR_CGROUP_CONTROLLER_CPUSET, + "cpuset.cpus", + cpus); +} + +/** + * virCgroupDenyAllDevices: + * + * @group: The cgroup to deny all permissions, for all devices + * + * Returns: 0 on success + */ +int virCgroupDenyAllDevices(virCgroupPtr group) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + "a"); +} + +/** + * virCgroupAllowDevice: + * + * @group: The cgroup to allow a device for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device + * @minor: The minor number of the device + * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow + * + * Returns: 0 on success + */ +int virCgroupAllowDevice(virCgroupPtr group, char type, int major, int minor, + int perms) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor, + perms & VIR_CGROUP_DEVICE_READ ? "r" : "", + perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", + perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.allow", + devstr); +out: + VIR_FREE(devstr); + + return rc; +} + +/** + * virCgroupAllowDeviceMajor: + * + * @group: The cgroup to allow an entire device major type for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device type + * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow + * + * Returns: 0 on success + */ +int virCgroupAllowDeviceMajor(virCgroupPtr group, char type, int major, + int perms) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major, + perms & VIR_CGROUP_DEVICE_READ ? "r" : "", + perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", + perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.allow", + devstr); + out: + VIR_FREE(devstr); + + return rc; +} + +/** + * virCgroupAllowDevicePath: + * + * @group: The cgroup to allow the device for + * @path: the device to allow + * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to allow + * + * Queries the type of device and its major/minor number, and + * adds that to the cgroup ACL + * + * Returns: 0 on success, 1 if path exists but is not a device, or + * negative errno value on failure + */ +#if defined(major) && defined(minor) +int virCgroupAllowDevicePath(virCgroupPtr group, const char *path, int perms) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return 1; + + return virCgroupAllowDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev), + perms); +} +#else +int virCgroupAllowDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED, + const char *path ATTRIBUTE_UNUSED, + int perms ATTRIBUTE_UNUSED) +{ + return -ENOSYS; +} +#endif + + +/** + * virCgroupDenyDevice: + * + * @group: The cgroup to deny a device for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device + * @minor: The minor number of the device + * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny + * + * Returns: 0 on success + */ +int virCgroupDenyDevice(virCgroupPtr group, char type, int major, int minor, + int perms) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:%i %s%s%s", type, major, minor, + perms & VIR_CGROUP_DEVICE_READ ? "r" : "", + perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", + perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + devstr); +out: + VIR_FREE(devstr); + + return rc; +} + +/** + * virCgroupDenyDeviceMajor: + * + * @group: The cgroup to deny an entire device major type for + * @type: The device type (i.e., 'c' or 'b') + * @major: The major number of the device type + * @perms: Bitwise or of VIR_CGROUP_DEVICE permission bits to deny + * + * Returns: 0 on success + */ +int virCgroupDenyDeviceMajor(virCgroupPtr group, char type, int major, + int perms) +{ + int rc; + char *devstr = NULL; + + if (virAsprintf(&devstr, "%c %i:* %s%s%s", type, major, + perms & VIR_CGROUP_DEVICE_READ ? "r" : "", + perms & VIR_CGROUP_DEVICE_WRITE ? "w" : "", + perms & VIR_CGROUP_DEVICE_MKNOD ? "m" : "") == -1) { + rc = -ENOMEM; + goto out; + } + + rc = virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_DEVICES, + "devices.deny", + devstr); + out: + VIR_FREE(devstr); + + return rc; +} + +#if defined(major) && defined(minor) +int virCgroupDenyDevicePath(virCgroupPtr group, const char *path, int perms) +{ + struct stat sb; + + if (stat(path, &sb) < 0) + return -errno; + + if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) + return 1; + + return virCgroupDenyDevice(group, + S_ISCHR(sb.st_mode) ? 'c' : 'b', + major(sb.st_rdev), + minor(sb.st_rdev), + perms); +} +#else +int virCgroupDenyDevicePath(virCgroupPtr group ATTRIBUTE_UNUSED, + const char *path ATTRIBUTE_UNUSED, + int perms ATTRIBUTE_UNUSED) +{ + return -ENOSYS; +} +#endif + +int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares) +{ + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.shares", shares); +} + +int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares) +{ + return virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.shares", shares); +} + +/** + * virCgroupSetCpuCfsPeriod: + * + * @group: The cgroup to change cpu.cfs_period_us for + * @cfs_period: The bandwidth period in usecs + * + * Returns: 0 on success + */ +int virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period) +{ + /* The cfs_period shoule be greater or equal than 1ms, and less or equal + * than 1s. + */ + if (cfs_period < 1000 || cfs_period > 1000000) + return -EINVAL; + + return virCgroupSetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.cfs_period_us", cfs_period); +} + +/** + * virCgroupGetCpuCfsPeriod: + * + * @group: The cgroup to get cpu.cfs_period_us for + * @cfs_period: Pointer to the returned bandwidth period in usecs + * + * Returns: 0 on success + */ +int virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period) +{ + return virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.cfs_period_us", cfs_period); +} + +/** + * virCgroupSetCpuCfsQuota: + * + * @group: The cgroup to change cpu.cfs_quota_us for + * @cfs_quota: the cpu bandwidth (in usecs) that this tg will be allowed to + * consume over period + * + * Returns: 0 on success + */ +int virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota) +{ + if (cfs_quota >= 0) { + /* The cfs_quota shoule be greater or equal than 1ms */ + if (cfs_quota < 1000) + return -EINVAL; + + /* check overflow */ + if (cfs_quota > ULLONG_MAX / 1000) + return -EINVAL; + } + + return virCgroupSetValueI64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.cfs_quota_us", cfs_quota); +} + +/** + * virCgroupGetCpuCfsQuota: + * + * @group: The cgroup to get cpu.cfs_quota_us for + * @cfs_quota: Pointer to the returned cpu bandwidth (in usecs) that this tg + * will be allowed to consume over period + * + * Returns: 0 on success + */ +int virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota) +{ + return virCgroupGetValueI64(group, + VIR_CGROUP_CONTROLLER_CPU, + "cpu.cfs_quota_us", cfs_quota); +} + +int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage) +{ + return virCgroupGetValueU64(group, + VIR_CGROUP_CONTROLLER_CPUACCT, + "cpuacct.usage", usage); +} + +int virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage) +{ + return virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT, + "cpuacct.usage_percpu", usage); +} + +#ifdef _SC_CLK_TCK +int virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user, + unsigned long long *sys) +{ + char *str; + char *p; + int ret; + static double scale = -1.0; + + if ((ret = virCgroupGetValueStr(group, VIR_CGROUP_CONTROLLER_CPUACCT, + "cpuacct.stat", &str)) < 0) + return ret; + if (!(p = STRSKIP(str, "user ")) || + virStrToLong_ull(p, &p, 10, user) < 0 || + !(p = STRSKIP(p, "\nsystem ")) || + virStrToLong_ull(p, NULL, 10, sys) < 0) { + ret = -EINVAL; + goto cleanup; + } + /* times reported are in system ticks (generally 100 Hz), but that + * rate can theoretically vary between machines. Scale things + * into approximate nanoseconds. */ + if (scale < 0) { + long ticks_per_sec = sysconf(_SC_CLK_TCK); + if (ticks_per_sec == -1) { + ret = -errno; + goto cleanup; + } + scale = 1000000000.0 / ticks_per_sec; + } + *user *= scale; + *sys *= scale; + + ret = 0; +cleanup: + VIR_FREE(str); + return ret; +} +#else +int virCgroupGetCpuacctStat(virCgroupPtr group ATTRIBUTE_UNUSED, + unsigned long long *user ATTRIBUTE_UNUSED, + unsigned long long *sys ATTRIBUTE_UNUSED) +{ + return -ENOSYS; +} +#endif + +int virCgroupSetFreezerState(virCgroupPtr group, const char *state) +{ + return virCgroupSetValueStr(group, + VIR_CGROUP_CONTROLLER_FREEZER, + "freezer.state", state); +} + +int virCgroupGetFreezerState(virCgroupPtr group, char **state) +{ + return virCgroupGetValueStr(group, + VIR_CGROUP_CONTROLLER_FREEZER, + "freezer.state", state); +} + + +#if defined HAVE_KILL && defined HAVE_MNTENT_H && defined HAVE_GETMNTENT_R +static int virCgroupKillInternal(virCgroupPtr group, int signum, virHashTablePtr pids) +{ + int rc; + int killedAny = 0; + char *keypath = NULL; + bool done = false; + FILE *fp = NULL; + VIR_DEBUG("group=%p path=%s signum=%d pids=%p", + group, group->path, signum, pids); + + rc = virCgroupPathOfController(group, -1, "tasks", &keypath); + if (rc != 0) { + VIR_DEBUG("No path of %s, tasks", group->path); + return rc; + } + + /* PIDs may be forking as we kill them, so loop + * until there are no new PIDs found + */ + while (!done) { + done = true; + if (!(fp = fopen(keypath, "r"))) { + rc = -errno; + VIR_DEBUG("Failed to read %s: %m\n", keypath); + goto cleanup; + } else { + while (!feof(fp)) { + unsigned long pid_value; + if (fscanf(fp, "%lu", &pid_value) != 1) { + if (feof(fp)) + break; + rc = -errno; + VIR_DEBUG("Failed to read %s: %m\n", keypath); + goto cleanup; + } + if (virHashLookup(pids, (void*)pid_value)) + continue; + + VIR_DEBUG("pid=%lu", pid_value); + /* Cgroups is a Linux concept, so this cast is safe. */ + if (kill((pid_t)pid_value, signum) < 0) { + if (errno != ESRCH) { + rc = -errno; + goto cleanup; + } + /* Leave RC == 0 since we didn't kill one */ + } else { + killedAny = 1; + done = false; + } + + ignore_value(virHashAddEntry(pids, (void*)pid_value, (void*)1)); + } + VIR_FORCE_FCLOSE(fp); + } + } + + rc = killedAny ? 1 : 0; + +cleanup: + VIR_FREE(keypath); + VIR_FORCE_FCLOSE(fp); + + return rc; +} + + +static uint32_t virCgroupPidCode(const void *name, uint32_t seed) +{ + unsigned long pid_value = (unsigned long)(intptr_t)name; + return virHashCodeGen(&pid_value, sizeof(pid_value), seed); +} +static bool virCgroupPidEqual(const void *namea, const void *nameb) +{ + return namea == nameb; +} +static void *virCgroupPidCopy(const void *name) +{ + return (void*)name; +} + +/* + * Returns + * < 0 : errno that occurred + * 0 : no PIDs killed + * 1 : at least one PID killed + */ +int virCgroupKill(virCgroupPtr group, int signum) +{ + VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum); + int rc; + /* The 'tasks' file in cgroups can contain duplicated + * pids, so we use a hash to track which we've already + * killed. + */ + virHashTablePtr pids = virHashCreateFull(100, + NULL, + virCgroupPidCode, + virCgroupPidEqual, + virCgroupPidCopy, + NULL); + + rc = virCgroupKillInternal(group, signum, pids); + + virHashFree(pids); + + return rc; +} + + +static int virCgroupKillRecursiveInternal(virCgroupPtr group, int signum, virHashTablePtr pids, bool dormdir) +{ + int rc; + int killedAny = 0; + char *keypath = NULL; + DIR *dp; + virCgroupPtr subgroup = NULL; + struct dirent *ent; + VIR_DEBUG("group=%p path=%s signum=%d pids=%p", group, group->path, signum, pids); + + rc = virCgroupPathOfController(group, -1, "", &keypath); + if (rc != 0) { + VIR_DEBUG("No path of %s, tasks", group->path); + return rc; + } + + if ((rc = virCgroupKillInternal(group, signum, pids)) != 0) + return rc; + + VIR_DEBUG("Iterate over children of %s", keypath); + if (!(dp = opendir(keypath))) { + rc = -errno; + return rc; + } + + while ((ent = readdir(dp))) { + char *subpath; + + if (STREQ(ent->d_name, ".")) + continue; + if (STREQ(ent->d_name, "..")) + continue; + if (ent->d_type != DT_DIR) + continue; + + VIR_DEBUG("Process subdir %s", ent->d_name); + if (virAsprintf(&subpath, "%s/%s", group->path, ent->d_name) < 0) { + rc = -ENOMEM; + goto cleanup; + } + + if ((rc = virCgroupNew(subpath, &subgroup)) != 0) + goto cleanup; + + if ((rc = virCgroupKillRecursiveInternal(subgroup, signum, pids, true)) < 0) + goto cleanup; + if (rc == 1) + killedAny = 1; + + if (dormdir) + virCgroupRemove(subgroup); + + virCgroupFree(&subgroup); + } + + rc = killedAny; + +cleanup: + virCgroupFree(&subgroup); + closedir(dp); + + return rc; +} + +int virCgroupKillRecursive(virCgroupPtr group, int signum) +{ + int rc; + VIR_DEBUG("group=%p path=%s signum=%d", group, group->path, signum); + virHashTablePtr pids = virHashCreateFull(100, + NULL, + virCgroupPidCode, + virCgroupPidEqual, + virCgroupPidCopy, + NULL); + + rc = virCgroupKillRecursiveInternal(group, signum, pids, false); + + virHashFree(pids); + + return rc; +} + + +int virCgroupKillPainfully(virCgroupPtr group) +{ + int i; + int rc; + VIR_DEBUG("cgroup=%p path=%s", group, group->path); + for (i = 0 ; i < 15 ; i++) { + int signum; + if (i == 0) + signum = SIGTERM; + else if (i == 8) + signum = SIGKILL; + else + signum = 0; /* Just check for existence */ + + rc = virCgroupKillRecursive(group, signum); + VIR_DEBUG("Iteration %d rc=%d", i, rc); + /* If rc == -1 we hit error, if 0 we ran out of PIDs */ + if (rc <= 0) + break; + + usleep(200 * 1000); + } + VIR_DEBUG("Complete %d", rc); + return rc; +} + +#else /* !(HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R) */ +int virCgroupKill(virCgroupPtr group ATTRIBUTE_UNUSED, + int signum ATTRIBUTE_UNUSED) +{ + return -ENOSYS; +} +int virCgroupKillRecursive(virCgroupPtr group ATTRIBUTE_UNUSED, + int signum ATTRIBUTE_UNUSED) +{ + return -ENOSYS; +} + +int virCgroupKillPainfully(virCgroupPtr group ATTRIBUTE_UNUSED) +{ + return -ENOSYS; +} +#endif /* HAVE_KILL, HAVE_MNTENT_H, HAVE_GETMNTENT_R */ diff --git a/src/util/vircgroup.h b/src/util/vircgroup.h new file mode 100644 index 0000000..8b6d3b2 --- /dev/null +++ b/src/util/vircgroup.h @@ -0,0 +1,167 @@ +/* + * vircgroup.h: methods for managing control cgroups + * + * Copyright (C) 2011-2012 Red Hat, Inc. + * Copyright IBM Corp. 2008 + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + * + * Authors: + * Dan Smith <danms@xxxxxxxxxx> + */ + +#ifndef __VIR_CGROUP_H__ +# define __VIR_CGROUP_H__ + +struct virCgroup; +typedef struct virCgroup *virCgroupPtr; + +enum { + VIR_CGROUP_CONTROLLER_CPU, + VIR_CGROUP_CONTROLLER_CPUACCT, + VIR_CGROUP_CONTROLLER_CPUSET, + VIR_CGROUP_CONTROLLER_MEMORY, + VIR_CGROUP_CONTROLLER_DEVICES, + VIR_CGROUP_CONTROLLER_FREEZER, + VIR_CGROUP_CONTROLLER_BLKIO, + + VIR_CGROUP_CONTROLLER_LAST +}; + +VIR_ENUM_DECL(virCgroupController); + +int virCgroupForDriver(const char *name, + virCgroupPtr *group, + bool privileged, + bool create); + +int virCgroupGetAppRoot(virCgroupPtr *group); + +int virCgroupForDomain(virCgroupPtr driver, + const char *name, + virCgroupPtr *group, + bool create); + +int virCgroupForVcpu(virCgroupPtr driver, + int vcpuid, + virCgroupPtr *group, + bool create); + +int virCgroupForEmulator(virCgroupPtr driver, + virCgroupPtr *group, + bool create); + +int virCgroupPathOfController(virCgroupPtr group, + int controller, + const char *key, + char **path); + +int virCgroupAddTask(virCgroupPtr group, pid_t pid); + +int virCgroupAddTaskController(virCgroupPtr group, + pid_t pid, + int controller); + +int virCgroupMoveTask(virCgroupPtr src_group, + virCgroupPtr dest_group, + int controller); + +int virCgroupSetBlkioWeight(virCgroupPtr group, unsigned int weight); +int virCgroupGetBlkioWeight(virCgroupPtr group, unsigned int *weight); + +int virCgroupSetBlkioDeviceWeight(virCgroupPtr group, + const char *path, + unsigned int weight); + +int virCgroupSetMemory(virCgroupPtr group, unsigned long long kb); +int virCgroupGetMemoryUsage(virCgroupPtr group, unsigned long *kb); + +int virCgroupSetMemoryHardLimit(virCgroupPtr group, unsigned long long kb); +int virCgroupGetMemoryHardLimit(virCgroupPtr group, unsigned long long *kb); +int virCgroupSetMemorySoftLimit(virCgroupPtr group, unsigned long long kb); +int virCgroupGetMemorySoftLimit(virCgroupPtr group, unsigned long long *kb); +int virCgroupSetMemSwapHardLimit(virCgroupPtr group, unsigned long long kb); +int virCgroupGetMemSwapHardLimit(virCgroupPtr group, unsigned long long *kb); +int virCgroupGetMemSwapUsage(virCgroupPtr group, unsigned long long *kb); + +enum { + VIR_CGROUP_DEVICE_READ = 1, + VIR_CGROUP_DEVICE_WRITE = 2, + VIR_CGROUP_DEVICE_MKNOD = 4, + VIR_CGROUP_DEVICE_RW = VIR_CGROUP_DEVICE_READ | VIR_CGROUP_DEVICE_WRITE, + VIR_CGROUP_DEVICE_RWM = VIR_CGROUP_DEVICE_RW | VIR_CGROUP_DEVICE_MKNOD, +}; + +int virCgroupDenyAllDevices(virCgroupPtr group); + +int virCgroupAllowDevice(virCgroupPtr group, + char type, + int major, + int minor, + int perms); +int virCgroupAllowDeviceMajor(virCgroupPtr group, + char type, + int major, + int perms); +int virCgroupAllowDevicePath(virCgroupPtr group, + const char *path, + int perms); + +int virCgroupDenyDevice(virCgroupPtr group, + char type, + int major, + int minor, + int perms); +int virCgroupDenyDeviceMajor(virCgroupPtr group, + char type, + int major, + int perms); +int virCgroupDenyDevicePath(virCgroupPtr group, + const char *path, + int perms); + +int virCgroupSetCpuShares(virCgroupPtr group, unsigned long long shares); +int virCgroupGetCpuShares(virCgroupPtr group, unsigned long long *shares); + +int virCgroupSetCpuCfsPeriod(virCgroupPtr group, unsigned long long cfs_period); +int virCgroupGetCpuCfsPeriod(virCgroupPtr group, unsigned long long *cfs_period); + +int virCgroupSetCpuCfsQuota(virCgroupPtr group, long long cfs_quota); +int virCgroupGetCpuCfsQuota(virCgroupPtr group, long long *cfs_quota); + +int virCgroupGetCpuacctUsage(virCgroupPtr group, unsigned long long *usage); +int virCgroupGetCpuacctPercpuUsage(virCgroupPtr group, char **usage); +int virCgroupGetCpuacctStat(virCgroupPtr group, unsigned long long *user, + unsigned long long *sys); + +int virCgroupSetFreezerState(virCgroupPtr group, const char *state); +int virCgroupGetFreezerState(virCgroupPtr group, char **state); + +int virCgroupSetCpusetMems(virCgroupPtr group, const char *mems); +int virCgroupGetCpusetMems(virCgroupPtr group, char **mems); + +int virCgroupSetCpusetCpus(virCgroupPtr group, const char *cpus); +int virCgroupGetCpusetCpus(virCgroupPtr group, char **cpus); + +int virCgroupRemove(virCgroupPtr group); + +void virCgroupFree(virCgroupPtr *group); +bool virCgroupMounted(virCgroupPtr cgroup, int controller); + +int virCgroupKill(virCgroupPtr group, int signum); +int virCgroupKillRecursive(virCgroupPtr group, int signum); +int virCgroupKillPainfully(virCgroupPtr group); + +#endif /* __VIR_CGROUP_H__ */ -- 1.7.11.7 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list