From: Hu Tao <hutao@xxxxxxxxxxxxxx> This involves setting the cpuacct cgroup to a per-vcpu granularity, as well as summing the each vcpu accounting into a common array. Now that we are reading more than one cgroup file, we double-check that cpus weren't hot-plugged between reads to invalidate our summing. Signed-off-by: Eric Blake <eblake@xxxxxxxxxx> --- diff from v4: rewrite qemu code to use fewer malloc calls, fix some logic bugs src/qemu/qemu_driver.c | 123 ++++++++++++++++++++++++++++++++++++++++++++---- src/util/cgroup.c | 4 +- 2 files changed, 117 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 0fd7de1..f6d0985 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -104,7 +104,7 @@ #define QEMU_NB_NUMA_PARAM 2 #define QEMU_NB_TOTAL_CPU_STAT_PARAM 3 -#define QEMU_NB_PER_CPU_STAT_PARAM 1 +#define QEMU_NB_PER_CPU_STAT_PARAM 2 #if HAVE_LINUX_KVM_H # include <linux/kvm.h> @@ -12563,8 +12563,69 @@ qemuDomainGetTotalcpuStats(virCgroupPtr group, return nparams; } +/* This function gets the sums of cpu time consumed by all vcpus. + * For example, if there are 4 physical cpus, and 2 vcpus in a domain, + * then for each vcpu, the cpuacct.usage_percpu looks like this: + * t0 t1 t2 t3 + * and we have 2 groups of such data: + * v\p 0 1 2 3 + * 0 t00 t01 t02 t03 + * 1 t10 t11 t12 t13 + * for each pcpu, the sum is cpu time consumed by all vcpus. + * s0 = t00 + t10 + * s1 = t01 + t11 + * s2 = t02 + t12 + * s3 = t03 + t13 + */ +static int +getSumVcpuPercpuStats(virCgroupPtr group, + unsigned int nvcpu, + unsigned long long *sum_cpu_time, + unsigned int num) +{ + int ret = -1; + int i; + char *buf = NULL; + virCgroupPtr group_vcpu = NULL; + + for (i = 0; i < nvcpu; i++) { + char *pos; + unsigned long long tmp; + int j; + + if (virCgroupForVcpu(group, i, &group_vcpu, 0) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("error accessing cgroup cpuacct for vcpu")); + goto cleanup; + } + + if (virCgroupGetCpuacctPercpuUsage(group, &buf) < 0) + goto cleanup; + + pos = buf; + for (j = 0; j < num; j++) { + if (virStrToLong_ull(pos, &pos, 10, &tmp) < 0) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + _("cpuacct parse error")); + goto cleanup; + } + sum_cpu_time[j] += tmp; + } + + virCgroupFree(&group_vcpu); + VIR_FREE(buf); + } + + ret = 0; +cleanup: + virCgroupFree(&group_vcpu); + VIR_FREE(buf); + return ret; +} + static int qemuDomainGetPercpuStats(virDomainPtr domain, + virDomainObjPtr vm, virCgroupPtr group, virTypedParameterPtr params, unsigned int nparams, @@ -12572,20 +12633,24 @@ qemuDomainGetPercpuStats(virDomainPtr domain, unsigned int ncpus) { char *map = NULL; + char *map2 = NULL; int rv = -1; int i, max_id; char *pos; char *buf = NULL; + unsigned long long *sum_cpu_time = NULL; + unsigned long long *sum_cpu_pos; + unsigned int n = 0; + qemuDomainObjPrivatePtr priv = vm->privateData; virTypedParameterPtr ent; int param_idx; + unsigned long long cpu_time; /* return the number of supported params */ if (nparams == 0 && ncpus != 0) - return QEMU_NB_PER_CPU_STAT_PARAM; /* only cpu_time is supported */ + return QEMU_NB_PER_CPU_STAT_PARAM; - /* return percpu cputime in index 0 */ - param_idx = 0; - /* to parse account file, we need "present" cpu map */ + /* To parse account file, we need "present" cpu map. */ map = nodeGetCPUmap(domain->conn, &max_id, "present"); if (!map) return rv; @@ -12608,30 +12673,70 @@ qemuDomainGetPercpuStats(virDomainPtr domain, pos = buf; memset(params, 0, nparams * ncpus); + /* return percpu cputime in index 0 */ + param_idx = 0; + if (max_id - start_cpu > ncpus - 1) max_id = start_cpu + ncpus - 1; for (i = 0; i <= max_id; i++) { - unsigned long long cpu_time; - if (!map[i]) { cpu_time = 0; } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) { qemuReportError(VIR_ERR_INTERNAL_ERROR, _("cpuacct parse error")); goto cleanup; + } else { + n++; } if (i < start_cpu) continue; - ent = ¶ms[ (i - start_cpu) * nparams + param_idx]; + ent = ¶ms[(i - start_cpu) * nparams + param_idx]; if (virTypedParameterAssign(ent, VIR_DOMAIN_CPU_STATS_CPUTIME, VIR_TYPED_PARAM_ULLONG, cpu_time) < 0) goto cleanup; } + + /* return percpu vcputime in index 1 */ + if (++param_idx >= nparams) { + rv = nparams; + goto cleanup; + } + + if (VIR_ALLOC_N(sum_cpu_time, n) < 0) { + virReportOOMError(); + goto cleanup; + } + if (getSumVcpuPercpuStats(group, priv->nvcpupids, sum_cpu_time, n) < 0) + goto cleanup; + + /* Check that the mapping of online cpus didn't change mid-parse. */ + map2 = nodeGetCPUmap(domain->conn, &max_id, "present"); + if (!map2 || memcmp(map, map2, VIR_DOMAIN_CPUMASK_LEN) != 0) + goto cleanup; + + sum_cpu_pos = sum_cpu_time; + for (i = 0; i <= max_id; i++) { + if (!map[i]) + cpu_time = 0; + else + cpu_time = *(sum_cpu_pos++); + if (i < start_cpu) + continue; + if (virTypedParameterAssign(¶ms[(i - start_cpu) * nparams + + param_idx], + VIR_DOMAIN_CPU_STATS_VCPUTIME, + VIR_TYPED_PARAM_ULLONG, + cpu_time) < 0) + goto cleanup; + } + rv = param_idx + 1; cleanup: + VIR_FREE(sum_cpu_time); VIR_FREE(buf); VIR_FREE(map); + VIR_FREE(map2); return rv; } @@ -12683,7 +12788,7 @@ qemuDomainGetCPUStats(virDomainPtr domain, if (start_cpu == -1) ret = qemuDomainGetTotalcpuStats(group, params, nparams); else - ret = qemuDomainGetPercpuStats(domain, group, params, nparams, + ret = qemuDomainGetPercpuStats(domain, vm, group, params, nparams, start_cpu, ncpus); cleanup: virCgroupFree(&group); diff --git a/src/util/cgroup.c b/src/util/cgroup.c index ad49bc2..5b32881 100644 --- a/src/util/cgroup.c +++ b/src/util/cgroup.c @@ -530,7 +530,9 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group, continue; /* We need to control cpu bandwidth for each vcpu now */ - if ((flags & VIR_CGROUP_VCPU) && (i != VIR_CGROUP_CONTROLLER_CPU)) { + if ((flags & VIR_CGROUP_VCPU) && + (i != VIR_CGROUP_CONTROLLER_CPU && + i != VIR_CGROUP_CONTROLLER_CPUACCT)) { /* treat it as unmounted and we can use virCgroupAddTask */ VIR_FREE(group->controllers[i].mountPoint); continue; -- 1.7.7.6 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list