This commit tries to fix lots of issues related to LXC VCPUs. One of them is related to /proc/cpuinfo content. If only 1 VCPU is set, LXC containers will show all CPUs available for host. The second one is related to CPU share, if an user set only 1 VCPU, the container/process will use all available CPUs. (This is not the case when `cpuset` attribute is declared). So, this commit adds a virtual cpuinfo based on VCPU mapping and it automatically limits the CPU usage according VCPU count. Example (now): LXC container - 8 CPUS with 2 VCPU: lxc-root# stress --cpu 8 On host machine, only CPU 0 and 1 have 100% usage. Signed-off-by: Julio Faracco <jcfaracco@xxxxxxxxx> --- src/lxc/lxc_cgroup.c | 31 ++++++++++++++ src/lxc/lxc_container.c | 39 ++++++++++------- src/lxc/lxc_fuse.c | 95 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 145 insertions(+), 20 deletions(-) diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index d29b65092a..912a252473 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -50,6 +50,34 @@ static int virLXCCgroupSetupCpuTune(virDomainDefPtr def, } +static int virLXCCgroupSetupVcpuAuto(virDomainDefPtr def, + virCgroupPtr cgroup) +{ + size_t i; + int vcpumax; + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr cpuset = &buffer; + + vcpumax = virDomainDefGetVcpusMax(def); + for (i = 0; i < vcpumax; i++) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, i); + /* Cgroup is smart enough to convert numbers separated + * by comma into ranges. Example: "0,1,2,5," -> "0-2,5". + * Libvirt does not need to process it here. */ + if (vcpu) + virBufferAsprintf(cpuset, "%zu,", i); + } + if (virCgroupSetCpusetCpus(cgroup, + virBufferCurrentContent(cpuset)) < 0) { + virBufferFreeAndReset(cpuset); + return -1; + } + + virBufferFreeAndReset(cpuset); + return 0; +} + + static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, virCgroupPtr cgroup, virBitmapPtr nodemask) @@ -61,6 +89,9 @@ static int virLXCCgroupSetupCpusetTune(virDomainDefPtr def, def->cpumask && virCgroupSetupCpusetCpus(cgroup, def->cpumask) < 0) { return -1; + } else { + /* auto mode for VCPU limits */ + virLXCCgroupSetupVcpuAuto(def, cgroup); } if (virDomainNumatuneGetMode(def->numa, -1, &mode) < 0 || diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 41efe43a14..88e27f3060 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -997,8 +997,8 @@ static int lxcContainerMountBasicFS(bool userns_enabled, static int lxcContainerMountProcFuse(virDomainDefPtr def, const char *stateDir) { - int ret; - char *meminfo_path = NULL; + g_autofree char *meminfo_path = NULL; + g_autofree char *cpuinfo_path = NULL; VIR_DEBUG("Mount /proc/meminfo stateDir=%s", stateDir); @@ -1006,15 +1006,29 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def, stateDir, def->name); - if ((ret = mount(meminfo_path, "/proc/meminfo", - NULL, MS_BIND, NULL)) < 0) { + if (mount(meminfo_path, "/proc/meminfo", + NULL, MS_BIND, NULL) < 0) { virReportSystemError(errno, _("Failed to mount %s on /proc/meminfo"), meminfo_path); + return -1; } - VIR_FREE(meminfo_path); - return ret; + VIR_DEBUG("Mount /proc/cpuinfo stateDir=%s", stateDir); + + cpuinfo_path = g_strdup_printf("/.oldroot/%s/%s.fuse/cpuinfo", + stateDir, + def->name); + + if (mount(cpuinfo_path, "/proc/cpuinfo", + NULL, MS_BIND, NULL) < 0) { + virReportSystemError(errno, + _("Failed to mount %s on /proc/cpuinfo"), + cpuinfo_path); + return -1; + } + + return 0; } #else static int lxcContainerMountProcFuse(virDomainDefPtr def G_GNUC_UNUSED, @@ -1027,8 +1041,7 @@ static int lxcContainerMountProcFuse(virDomainDefPtr def G_GNUC_UNUSED, static int lxcContainerMountFSDev(virDomainDefPtr def, const char *stateDir) { - int ret = -1; - char *path = NULL; + g_autofree char *path = NULL; int flags = def->idmap.nuidmap ? MS_BIND : MS_MOVE; VIR_DEBUG("Mount /dev/ stateDir=%s", stateDir); @@ -1038,7 +1051,7 @@ static int lxcContainerMountFSDev(virDomainDefPtr def, if (virFileMakePath("/dev") < 0) { virReportSystemError(errno, "%s", _("Cannot create /dev")); - goto cleanup; + return -1; } VIR_DEBUG("Trying to %s %s to /dev", def->idmap.nuidmap ? @@ -1048,14 +1061,10 @@ static int lxcContainerMountFSDev(virDomainDefPtr def, virReportSystemError(errno, _("Failed to mount %s on /dev"), path); - goto cleanup; + return -1; } - ret = 0; - - cleanup: - VIR_FREE(path); - return ret; + return 0; } static int lxcContainerMountFSDevPTS(virDomainDefPtr def, diff --git a/src/lxc/lxc_fuse.c b/src/lxc/lxc_fuse.c index 8cfccdd7e0..b2117bfa17 100644 --- a/src/lxc/lxc_fuse.c +++ b/src/lxc/lxc_fuse.c @@ -36,23 +36,29 @@ #if WITH_FUSE +#ifndef CPUINFO_FILE_LEN +# define CPUINFO_FILE_LEN (1024*1024) +#endif + static const char *fuse_meminfo_path = "/meminfo"; +static const char *fuse_cpuinfo_path = "/cpuinfo"; static int lxcProcGetattr(const char *path, struct stat *stbuf) { - g_autofree char *mempath = NULL; + g_autofree char *procpath = NULL; struct stat sb; struct fuse_context *context = fuse_get_context(); virDomainDefPtr def = (virDomainDefPtr)context->private_data; memset(stbuf, 0, sizeof(struct stat)); - mempath = g_strdup_printf("/proc/%s", path); + procpath = g_strdup_printf("/proc/%s", path); if (STREQ(path, "/")) { stbuf->st_mode = S_IFDIR | 0755; stbuf->st_nlink = 2; - } else if (STREQ(path, fuse_meminfo_path)) { - if (stat(mempath, &sb) < 0) + } else if (STREQ(path, fuse_meminfo_path) || + STREQ(path, fuse_cpuinfo_path)) { + if (stat(procpath, &sb) < 0) return -errno; stbuf->st_uid = def->idmap.uidmap ? def->idmap.uidmap[0].target : 0; @@ -83,6 +89,7 @@ static int lxcProcReaddir(const char *path, void *buf, filler(buf, ".", NULL, 0); filler(buf, "..", NULL, 0); filler(buf, fuse_meminfo_path + 1, NULL, 0); + filler(buf, fuse_cpuinfo_path + 1, NULL, 0); return 0; } @@ -90,7 +97,8 @@ static int lxcProcReaddir(const char *path, void *buf, static int lxcProcOpen(const char *path G_GNUC_UNUSED, struct fuse_file_info *fi G_GNUC_UNUSED) { - if (STRNEQ(path, fuse_meminfo_path)) + if (STRNEQ(path, fuse_meminfo_path) && + STRNEQ(path, fuse_cpuinfo_path)) return -ENOENT; if ((fi->flags & 3) != O_RDONLY) @@ -227,6 +235,80 @@ static int lxcProcReadMeminfo(char *hostpath, virDomainDefPtr def, return res; } + +static int +lxcProcReadCpuinfoParse(virDomainDefPtr def, char *base, + virBufferPtr new_cpuinfo) +{ + char *procline = NULL; + char *saveptr = base; + size_t cpu; + size_t nvcpu; + size_t curcpu = 0; + bool get_proc = false; + + nvcpu = virDomainDefGetVcpus(def); + while ((procline = strtok_r(NULL, "\n", &saveptr))) { + if (sscanf(procline, "processor\t: %zu", &cpu) == 1) { + virDomainVcpuDefPtr vcpu = virDomainDefGetVcpu(def, cpu); + /* VCPU is mapped */ + if (vcpu) { + if (curcpu == nvcpu) + break; + + if (curcpu > 0) + virBufferAddLit(new_cpuinfo, "\n"); + + virBufferAsprintf(new_cpuinfo, "processor\t: %zu\n", + curcpu); + curcpu++; + get_proc = true; + } else { + get_proc = false; + } + } else { + /* It is not a processor index */ + if (get_proc) + virBufferAsprintf(new_cpuinfo, "%s\n", procline); + } + } + + virBufferAddLit(new_cpuinfo, "\n"); + + return strlen(virBufferCurrentContent(new_cpuinfo)); +} + + +static int lxcProcReadCpuinfo(char *hostpath, virDomainDefPtr def, + char *buf, size_t size, off_t offset) +{ + virBuffer buffer = VIR_BUFFER_INITIALIZER; + virBufferPtr new_cpuinfo = &buffer; + g_autofree char *outbuf = NULL; + int res = -1; + + /* Gather info from /proc/cpuinfo */ + if (virFileReadAll(hostpath, CPUINFO_FILE_LEN, &outbuf) < 0) { + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to open %s"), hostpath); + return -1; + } + + /* /proc/cpuinfo does not support fseek */ + if (offset > 0) + return 0; + + res = lxcProcReadCpuinfoParse(def, outbuf, new_cpuinfo); + + if (res > size) + res = size; + memcpy(buf, virBufferCurrentContent(new_cpuinfo), res); + + virBufferFreeAndReset(new_cpuinfo); + return res; +} + + static int lxcProcRead(const char *path G_GNUC_UNUSED, char *buf G_GNUC_UNUSED, size_t size G_GNUC_UNUSED, @@ -246,6 +328,9 @@ static int lxcProcRead(const char *path G_GNUC_UNUSED, if (STREQ(path, fuse_meminfo_path)) { if ((res = lxcProcReadMeminfo(hostpath, def, buf, size, offset)) < 0) res = lxcProcHostRead(hostpath, buf, size, offset); + } else if (STREQ(path, fuse_cpuinfo_path)) { + if ((res = lxcProcReadCpuinfo(hostpath, def, buf, size, offset)) < 0) + res = lxcProcHostRead(hostpath, buf, size, offset); } return res; -- 2.20.1