On 05/09/2012 02:41 AM, Hu Tao wrote: Missing a decent commit message. > --- > src/qemu/qemu_driver.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++-- > src/util/cgroup.c | 4 +- > tools/virsh.c | 2 +- > 3 files changed, 164 insertions(+), 8 deletions(-) > > > +/* get the cpu time from cpuacct cgroup group, saving > + cpu time value in cpu_time. caller is responsible > + for freeing memory allocated for cpu_time. > + return 0 on success, -1 otherwise */ > +static int getVcpuPercpuStats(virCgroupPtr group, > + unsigned long long **cpu_time, > + unsigned int *num) > +{ I can inline this entire function, for less complexity. > + > +/* This function gets the sums of cpu time consumed by all vcpus. > + * For example, if there are 4 physical cpus, and 2 vcpus in a domain, > + * then for each vcpu, the cpuacct.usage_percpu looks like this: > + * t0 t1 t2 t3 > + * and we have 2 groups of such data: > + * v\p 0 1 2 3 > + * 0 t00 t01 t02 t03 > + * 1 t10 t11 t12 t13 > + * for each pcpu, the sum is cpu time consumed by all vcpus. > + * s0 = t00 + t10 > + * s1 = t01 + t11 > + * s2 = t02 + t12 > + * s3 = t03 + t13 Very useful comment. > + */ > +static int getSumVcpuPercpuStats(virCgroupPtr group, > + unsigned int nvcpu, > + unsigned long long **sum_cpu_time, > + unsigned int *num) We know in advance how bug 'num' should be if there are no concurrent cpu hotplug events from other processes - it should be the same length as the parent cgroup. If the length changes on the fly, then someone was hot-plugging cpus in between our read of cgroup files, and our mapping will be wrong. > +{ > + unsigned long long **cpu_time; > + unsigned int *ncpu_time; > + unsigned int max = 0; > + unsigned long long *tmp = NULL; > + int ret = -1; > + int i, j; > + > + if ((VIR_ALLOC_N(cpu_time, nvcpu) < 0) || > + (VIR_ALLOC_N(ncpu_time, nvcpu) < 0)) { > + virReportOOMError(); > + goto error; > + } We don't have to allocate a full 2-dimensional array only to sum it up later - we only need to access a one-dimensional array, and increase the sum on each pass through the next vcpu. In fact, if the caller allocates the array, then we can get away with fewer pointer dereferences. > + > + for (i = 0; i < nvcpu; i++) { > + virCgroupPtr group_vcpu = NULL; > + ret = virCgroupForVcpu(group, i, &group_vcpu, 0); > + if (ret < 0) { > + qemuReportError(VIR_ERR_INTERNAL_ERROR, > + _("error on creating cgroup cpuacct for vcpu")); We're not creating a new cgroup, so much as accessing an existing one. > + goto error; > + } > + ret = getVcpuPercpuStats(group_vcpu, > + &cpu_time[i], > + &ncpu_time[i]); > + virCgroupFree(&group_vcpu); > + > + if (ret < 0) > + goto error; > + > + if (max < ncpu_time[i]) > + max = ncpu_time[i]; > + } > + > + if (max > 0) { > + if (VIR_ALLOC_N(tmp, max) < 0) > + goto error; > + > + for (i = 0; i < nvcpu; i++) { > + for (j = 0; j < ncpu_time[i]; j++) > + tmp[j] += cpu_time[i][j]; > + } > + *sum_cpu_time = tmp; > + *num = max; > + ret = 0; > + } > + > +error: Since we also get here on success, it is better to name this 'success:'. > + if (cpu_time) { > + for (i = 0; i < nvcpu; i++) > + VIR_FREE(cpu_time[i]); > + } > + > + VIR_FREE(cpu_time); > + VIR_FREE(ncpu_time); > + return ret; > +} > + > static int > qemuDomainGetPercpuStats(virDomainPtr domain, > + virDomainObjPtr vm, > virCgroupPtr group, > virTypedParameterPtr params, > unsigned int nparams, > @@ -12518,15 +12639,17 @@ qemuDomainGetPercpuStats(virDomainPtr domain, > int i, max_id; > char *pos; > char *buf = NULL; > + unsigned long long *sum_cpu_time = NULL; > + unsigned int n = 0; > + qemuDomainObjPrivatePtr priv = vm->privateData; > virTypedParameterPtr ent; > int param_idx; > + unsigned long long cpu_time; > > /* return the number of supported params */ > if (nparams == 0 && ncpus != 0) > return QEMU_NB_PER_CPU_STAT_PARAM; /* only cpu_time is supported */ This comment is now outdated. > > - /* return percpu cputime in index 0 */ > - param_idx = 0; > /* to parse account file, we need "present" cpu map */ > map = nodeGetCPUmap(domain->conn, &max_id, "present"); Hmm, I wonder how likely we are to have a data race - now that we are reading more than one cgroup file, we risk some other process hotplugging a physical cpu in between our read of two files. It's probably unlikely, but we can be safer by getting the cpu map both at the beginning and the end of reading all cgroup files, and if the map differs, failing the command. > + > + /* return percpu vcputime in index 1 */ > + if (++param_idx >= nparams) { > + goto cleanup; > + } Oops - here, rv is still -1, so you end up failing the command if the user only asked for one stat. > + for (i = 0; i <= max_id && i < n; i++) { > + if (i < start_cpu) > + continue; > + > + if (!map[i]) > + cpu_time = 0; > + else > + cpu_time = sum_cpu_time[i]; This isn't right. Consider if you have 4 cpus, but cpu2 is hot-unplugged, and start_cpu was 1. Then cpuacct returns a list of 3 integers, where you then populate sum_cpu_time into a 3-element array, and you code returns: i = 0 => skip iteration i = 1 => params[0] => 0 i = 2 => params[1] => sum_cpu_time[2] i = 3 => skip iteration But you really want to return: i = 0 => skip iteration i = 1 => params[0] (cpu1) => sum_cpu_time[1] i = 2 => params[1] (cpu2) => 0 i = 3 => params[2] (cpu3) => sum_cpu_time[2] > + if (virTypedParameterAssign(¶ms[(i - start_cpu) * nparams + param_idx], Long line. > + VIR_DOMAIN_CPU_STATS_VCPUTIME, > + VIR_TYPED_PARAM_ULLONG, > + cpu_time) < 0) { > + VIR_FREE(sum_cpu_time); > + goto cleanup; > + } > + } > + VIR_FREE(sum_cpu_time); If you put this line after cleanup, you don't have to repeat it twice above. > +++ b/src/util/cgroup.c > @@ -530,7 +530,9 @@ static int virCgroupMakeGroup(virCgroupPtr parent, virCgroupPtr group, > continue; > > /* We need to control cpu bandwidth for each vcpu now */ > - if ((flags & VIR_CGROUP_VCPU) && (i != VIR_CGROUP_CONTROLLER_CPU)) { > + if ((flags & VIR_CGROUP_VCPU) && > + (i != VIR_CGROUP_CONTROLLER_CPU && > + i != VIR_CGROUP_CONTROLLER_CPUACCT)) { > /* treat it as unmounted and we can use virCgroupAddTask */ > VIR_FREE(group->controllers[i].mountPoint); > continue; Wow - so that's really all we have to do to start a per-vcpu cppuacct sub-hierarchy. > diff --git a/tools/virsh.c b/tools/virsh.c > index b9d05a2..d3fb448 100644 > --- a/tools/virsh.c > +++ b/tools/virsh.c > @@ -5632,7 +5632,7 @@ cmdCPUStats(vshControl *ctl, const vshCmd *cmd) > pos = i * nparams + j; > vshPrint(ctl, "\t%-12s ", params[pos].field); > if ((STREQ(params[pos].field, VIR_DOMAIN_CPU_STATS_CPUTIME) || > - STREQ(params[pos].field, VIR_DOMAIN_CPU_STATS_CPUTIME)) && > + STREQ(params[pos].field, VIR_DOMAIN_CPU_STATS_VCPUTIME)) && This hunk belongs in patch 1/2. I've hacked this enough (1 files changed, 60 insertions(+), 108 deletions(-) according to git) that it's worth me posting a v5 instead of pushing right away. Can you please test to make sure I didn't break anything? -- Eric Blake eblake@xxxxxxxxxx +1-919-301-3266 Libvirt virtualization library http://libvirt.org
Attachment:
signature.asc
Description: OpenPGP digital signature
-- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list