[PATCHv2] cpustat: fix regression when cpus are offline

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



It turns out that the cpuacct results properly account for offline
cpus, and always returns results for every possible cpu, not just
the online ones.  So there is no need to check the map of online
cpus in the first place, merely only a need to know the maximum
possible cpu.  Meanwhile, virNodeGetCPUBitmap had a subtle change
from returning the maximum id to instead returning the width of
the bitmap (one larger than the maximum id) in commit 2f4c5338,
which made this code encounter some off-by-one logic leading to
bad error messages when a cpu was offline:

$ virsh cpu-stats dom
error: Failed to virDomainGetCPUStats()

error: An error occurred, but the cause is unknown

Cleaning this up unraveled a chain of other unused variables.

* src/qemu/qemu_driver.c (qemuDomainGetPercpuStats): Drop
pointless check for cpumap changes, and use correct number of
cpus.  Simplify signature.
(qemuDomainGetCPUStats): Adjust caller.
* src/nodeinfo.h (nodeGetCPUCount): New prototype.
(nodeGetCPUBitmap): Drop unused parameter.
* src/nodeinfo.c (nodeGetCPUBitmap): Likewise.
(nodeGetCPUMap): Adjust caller.
(nodeGetCPUCount): New function.
* src/libvirt_private.syms (nodeinfo.h): Export it.
---

v2: add new function for determining just host cpu count, and
do even more cleanups based on unused variables

 src/libvirt_private.syms |  1 +
 src/nodeinfo.c           | 30 +++++++++++++++++++++++-------
 src/nodeinfo.h           |  4 ++--
 src/qemu/qemu_driver.c   | 39 ++++++++-------------------------------
 4 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 80bdf99..f350493 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -908,6 +908,7 @@ virNodeDeviceObjUnlock;
 # nodeinfo.h
 nodeCapsInitNUMA;
 nodeGetCPUBitmap;
+nodeGetCPUCount;
 nodeGetCPUMap;
 nodeGetCPUStats;
 nodeGetCellsFreeMemory;
diff --git a/src/nodeinfo.c b/src/nodeinfo.c
index 8b494df..b0be3ed 100644
--- a/src/nodeinfo.c
+++ b/src/nodeinfo.c
@@ -949,9 +949,24 @@ int nodeGetMemoryStats(virConnectPtr conn ATTRIBUTE_UNUSED,
 #endif
 }

+int
+nodeGetCPUCount(void)
+{
+#ifdef __linux__
+    /* XXX should we also work on older kernels, like RHEL5, that lack
+     * cpu/present and cpu/online files?  Those kernels also lack cpu
+     * hotplugging, so it would be a matter of finding the largest
+     * cpu/cpuNN directory, and returning NN + 1 */
+    return linuxParseCPUmax(SYSFS_SYSTEM_PATH "/cpu/present");
+#else
+    virReportError(VIR_ERR_NO_SUPPORT, "%s",
+                   _("host cpu counting not implemented on this platform"));
+    return NULL;
+#endif
+}
+
 virBitmapPtr
-nodeGetCPUBitmap(virConnectPtr conn ATTRIBUTE_UNUSED,
-                 int *max_id ATTRIBUTE_UNUSED)
+nodeGetCPUBitmap(int *max_id ATTRIBUTE_UNUSED)
 {
 #ifdef __linux__
     virBitmapPtr cpumap;
@@ -1249,10 +1264,11 @@ nodeGetMemoryParameters(virConnectPtr conn ATTRIBUTE_UNUSED,
 #endif
 }

-int nodeGetCPUMap(virConnectPtr conn,
-                  unsigned char **cpumap,
-                  unsigned int *online,
-                  unsigned int flags)
+int
+nodeGetCPUMap(virConnectPtr conn ATTRIBUTE_UNUSED,
+              unsigned char **cpumap,
+              unsigned int *online,
+              unsigned int flags)
 {
     virBitmapPtr cpus = NULL;
     int maxpresent;
@@ -1261,7 +1277,7 @@ int nodeGetCPUMap(virConnectPtr conn,

     virCheckFlags(0, -1);

-    if (!(cpus = nodeGetCPUBitmap(conn, &maxpresent)))
+    if (!(cpus = nodeGetCPUBitmap(&maxpresent)))
         goto cleanup;

     if (cpumap && virBitmapToData(cpus, cpumap, &dummy) < 0)
diff --git a/src/nodeinfo.h b/src/nodeinfo.h
index 44aa55d..350f3c3 100644
--- a/src/nodeinfo.h
+++ b/src/nodeinfo.h
@@ -46,8 +46,8 @@ int nodeGetCellsFreeMemory(virConnectPtr conn,
                            int maxCells);
 unsigned long long nodeGetFreeMemory(virConnectPtr conn);

-virBitmapPtr nodeGetCPUBitmap(virConnectPtr conn,
-                              int *max_id);
+virBitmapPtr nodeGetCPUBitmap(int *max_id);
+int nodeGetCPUCount(void);

 int nodeGetMemoryParameters(virConnectPtr conn,
                             virTypedParameterPtr params,
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 18be7d9..8cb92b6 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -13588,16 +13588,13 @@ cleanup:
 }

 static int
-qemuDomainGetPercpuStats(virDomainPtr domain,
-                         virDomainObjPtr vm,
+qemuDomainGetPercpuStats(virDomainObjPtr vm,
                          virCgroupPtr group,
                          virTypedParameterPtr params,
                          unsigned int nparams,
                          int start_cpu,
                          unsigned int ncpus)
 {
-    virBitmapPtr map = NULL;
-    virBitmapPtr map2 = NULL;
     int rv = -1;
     int i, id, max_id;
     char *pos;
@@ -13609,19 +13606,18 @@ qemuDomainGetPercpuStats(virDomainPtr domain,
     virTypedParameterPtr ent;
     int param_idx;
     unsigned long long cpu_time;
-    bool result;

     /* return the number of supported params */
     if (nparams == 0 && ncpus != 0)
         return QEMU_NB_PER_CPU_STAT_PARAM;

-    /* To parse account file, we need bitmap of online cpus.  */
-    map = nodeGetCPUBitmap(domain->conn, &max_id);
-    if (!map)
+    /* To parse account file, we need to know how many cpus are present.  */
+    max_id = nodeGetCPUCount();
+    if (max_id < 0)
         return rv;

     if (ncpus == 0) { /* returns max cpu ID */
-        rv = max_id + 1;
+        rv = max_id;
         goto cleanup;
     }

@@ -13648,11 +13644,7 @@ qemuDomainGetPercpuStats(virDomainPtr domain,
         id = start_cpu + ncpus - 1;

     for (i = 0; i <= id; i++) {
-        if (virBitmapGetBit(map, i, &result) < 0)
-            goto cleanup;
-        if (!result) {
-            cpu_time = 0;
-        } else if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) {
+        if (virStrToLong_ull(pos, &pos, 10, &cpu_time) < 0) {
             virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                            _("cpuacct parse error"));
             goto cleanup;
@@ -13680,22 +13672,9 @@ qemuDomainGetPercpuStats(virDomainPtr domain,
     if (getSumVcpuPercpuStats(group, priv->nvcpupids, sum_cpu_time, n) < 0)
         goto cleanup;

-    /* Check that the mapping of online cpus didn't change mid-parse.  */
-    map2 = nodeGetCPUBitmap(domain->conn, &max_id);
-    if (!map2 || !virBitmapEqual(map, map2)) {
-        virReportError(VIR_ERR_OPERATION_INVALID, "%s",
-                       _("the set of online cpus changed while reading"));
-        goto cleanup;
-    }
-
     sum_cpu_pos = sum_cpu_time;
     for (i = 0; i <= id; i++) {
-        if (virBitmapGetBit(map, i, &result) < 0)
-            goto cleanup;
-        if (!result)
-            cpu_time = 0;
-        else
-            cpu_time = *(sum_cpu_pos++);
+        cpu_time = *(sum_cpu_pos++);
         if (i < start_cpu)
             continue;
         if (virTypedParameterAssign(&params[(i - start_cpu) * nparams +
@@ -13710,8 +13689,6 @@ qemuDomainGetPercpuStats(virDomainPtr domain,
 cleanup:
     VIR_FREE(sum_cpu_time);
     VIR_FREE(buf);
-    virBitmapFree(map);
-    virBitmapFree(map2);
     return rv;
 }

@@ -13763,7 +13740,7 @@ qemuDomainGetCPUStats(virDomainPtr domain,
     if (start_cpu == -1)
         ret = qemuDomainGetTotalcpuStats(group, params, nparams);
     else
-        ret = qemuDomainGetPercpuStats(domain, vm, group, params, nparams,
+        ret = qemuDomainGetPercpuStats(vm, group, params, nparams,
                                        start_cpu, ncpus);
 cleanup:
     virCgroupFree(&group);
-- 
1.7.11.7

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list


[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]