Re: [PATCH v2] Add VM info to improve error log message for qemu monitor

Rohit Kumar <rohit.kumar3@xxxxxxxxxxx> · Tue, 4 Jan 2022 14:01:34 +0530

On 03/01/22 7:12 pm, Ani Sinha wrote:

On Wed, 22 Dec 2021, Rohit Kumar wrote:

This patch is to determine the VM which had IO or socket hangup error.
Accessing directly vm->def->name inside qemuMonitorIO() or qemuMonitorSend()
might leads to illegal access as we are out of 'vm' context and vm->def might
not exist. Adding a field "domainName" inside mon object to access vm name
and initialising it when creating mon object.
The commit message should desribe the purpose of the change, not how it is
done. That can be easily understood reading the code. As for why the
implementation follows a particular way, we can explain it in the code
comments.

This change adds the domain name in the error and debug logs during
monitor IO processing so that we may infer which VM experienced those
errors etc. This may help in debugging monitor IO errors. IMHO this
decription is enough.
Thanks Ani, I will update the commit message in the next patch.

Signed-off-by: Rohit Kumar <rohit.kumar3@xxxxxxxxxxx>
---
  diff to v1:
   - Adding a field domainName inside _qemuMonitor struct for accessing vm name
     instead of directly accessing mon->vm->def->name.
   - Link to v1: https://urldefense.proofpoint.com/v2/url?u=https-3A__listman.redhat.com_archives_libvir-2Dlist_2021-2DDecember_msg00217.html&d=DwIBAg&c=s883GpUCOChKOHiocYtGcg&r=ABSkr7gy7ZTfApFfI-Xxt1gZNtsDDiXoXOXc0OrkyFs&m=W2jEgk2s_xjLOLC-j8lPBnwVgteHS0-yYEGMqin16nmfzndzNgBE9mEvtbvyVzQo&s=Sa2q9edb4cvXo-EUriIPwlJxNo-itpp6--ZxY7rla2U&e=
   - Talked with peter on RFC and he suggested me to add a field inside
     monitor struct to get VM name.

  src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++----------------
  1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index dda6ae9796..c3a0227795 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -80,6 +80,7 @@ struct _qemuMonitor {
      GSource *watch;

      virDomainObj *vm;
+    char *domainName;

Here we can explain why we are adding an extra member as opposed to
referencing it directly.
Right, we can access it directly.

      qemuMonitorCallbacks *cb;
      void *callbackOpaque;
@@ -243,6 +244,7 @@ qemuMonitorDispose(void *obj)
      virCondDestroy(&mon->notify);
      g_free(mon->buffer);
      g_free(mon->balloonpath);
+    g_free(mon->domainName);
  }


@@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
      qemuMonitor *mon = opaque;
      bool error = false;
      bool hangup = false;
+    virDomainObj *vm = NULL;
+    char *vmName = NULL;

These initializations are redundant since ...
Acknowledged.
      virObjectRef(mon);

+    vm = mon->vm;
+    vmName = mon->domainName;
you initialize them here anyway.
I will remove these in next patch.

+
      /* lock access to the monitor and protect fd */
      virObjectLock(mon);
  #if DEBUG_IO
-    VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond);
+    VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, socket, cond, vm, NULLSTR(vmName));
  #endif
      if (mon->fd == -1 || !mon->watch) {
          virObjectUnlock(mon);
@@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,

          if (!error && !mon->goteof &&
              cond & G_IO_ERR) {
-            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                           _("Invalid file descriptor while waiting for monitor"));
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           _("%s: Invalid file descriptor while waiting for monitor"), NULLSTR(vmName));
As per https://urldefense.proofpoint.com/v2/url?u=https-3A__libvirt.org_coding-2Dstyle.html-23error-2Dmessage-2Dformat&d=DwIBAg&c=s883GpUCOChKOHiocYtGcg&r=ABSkr7gy7ZTfApFfI-Xxt1gZNtsDDiXoXOXc0OrkyFs&m=W2jEgk2s_xjLOLC-j8lPBnwVgteHS0-yYEGMqin16nmfzndzNgBE9mEvtbvyVzQo&s=zU-TBR-xTsKRXEW5gdgL0WRRYBg7B7cbLluu65_PfkI&e= , please
enclose %s within quotes:
_("'%s': Invalid file descriptor while waiting for monitor")
Ack. Thanks.
              mon->goteof = true;
          }
      }
@@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
              virResetLastError();
          } else {
              if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
-                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                               _("Error while processing monitor IO"));
+                virReportError(VIR_ERR_INTERNAL_ERROR,
+                               _("%s: Error while processing monitor IO"), NULLSTR(vmName));
Ditto.
Ack.
              virCopyLastError(&mon->lastError);
              virResetLastError();
          }

-        VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message));
+        VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s",
+                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
          /* If IO process resulted in an error & we have a message,
           * then wakeup that waiter */
          if (mon->msg && !mon->msg->finished) {
@@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
       * will try to acquire the virDomainObj *mutex next */
      if (mon->goteof) {
          qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
-        virDomainObj *vm = mon->vm;

          /* Make sure anyone waiting wakes up now */
          virCondSignal(&mon->notify);
          virObjectUnlock(mon);
-        VIR_DEBUG("Triggering EOF callback");
+        VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s",
+                  mon, vm, NULLSTR(vmName));
          (eofNotify)(mon, vm, mon->callbackOpaque);
          virObjectUnref(mon);
      } else if (error) {
          qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify;
-        virDomainObj *vm = mon->vm;

          /* Make sure anyone waiting wakes up now */
          virCondSignal(&mon->notify);
          virObjectUnlock(mon);
-        VIR_DEBUG("Triggering error callback");
+        VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s",
+                  mon, vm, NULLSTR(vmName));
          (errorNotify)(mon, vm, mon->callbackOpaque);
          virObjectUnref(mon);
      } else {
@@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm,
      mon->fd = fd;
      mon->context = g_main_context_ref(context);
      mon->vm = virObjectRef(vm);
+    mon->domainName = g_strdup(vm->def->name);
do not forget to g_free() this during cleanup in the same function.
So, in cleanup: qemuMonitorDispose is called. And there I have added 
g_free() to clean mon->domainName.

      mon->waitGreeting = true;
      mon->cb = cb;
      mon->callbackOpaque = opaque;
@@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon,
                  qemuMonitorMessage *msg)
  {
      int ret = -1;
+    virDomainObj *vm = mon->vm;
+    char *vmName = mon->domainName;

      /* Check whether qemu quit unexpectedly */
      if (mon->lastError.code != VIR_ERR_OK) {
-        VIR_DEBUG("Attempt to send command while error is set %s",
-                  NULLSTR(mon->lastError.message));
+        VIR_DEBUG("Attempt to send command while error is set %s mon=%p vm=%p name=%s",
+                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
          virSetError(&mon->lastError);
          return -1;
      }
      if (mon->goteof) {
-        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                       _("End of file from qemu monitor"));
+        virReportError(VIR_ERR_INTERNAL_ERROR,
+                       _("%s: End of file from qemu monitor"), NULLSTR(vmName));
ditto re error string format.
Ack.

          return -1;
      }

@@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,

      while (!mon->msg->finished) {
          if (virCondWait(&mon->notify, &mon->parent.lock) < 0) {
-            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-                           _("Unable to wait on monitor condition"));
+            virReportError(VIR_ERR_INTERNAL_ERROR,
+                           _("%s: Unable to wait on monitor condition"), NULLSTR(vmName));
ditto as above.
Ack.
              goto cleanup;
          }
      }

      if (mon->lastError.code != VIR_ERR_OK) {
-        VIR_DEBUG("Send command resulted in error %s",
-                  NULLSTR(mon->lastError.message));
+        VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p name=%s",
+                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
          virSetError(&mon->lastError);
          goto cleanup;
      }
--
2.25.1