Re: [PATCH v2] Add VM info to improve error log message for qemu monitor

Peter Krempa <pkrempa@xxxxxxxxxx> · Mon, 3 Jan 2022 17:42:41 +0100

On Wed, Dec 22, 2021 at 22:39:21 -0800, Rohit Kumar wrote:
> This patch is to determine the VM which had IO or socket hangup error.
> Accessing directly vm->def->name inside qemuMonitorIO() or qemuMonitorSend()
> might leads to illegal access as we are out of 'vm' context and vm->def might
> not exist. Adding a field "domainName" inside mon object to access vm name
> and initialising it when creating mon object.
> 
> Signed-off-by: Rohit Kumar <rohit.kumar3@xxxxxxxxxxx>
> ---
>  diff to v1:
>   - Adding a field domainName inside _qemuMonitor struct for accessing vm name
>     instead of directly accessing mon->vm->def->name.
>   - Link to v1: https://listman.redhat.com/archives/libvir-list/2021-December/msg00217.html
>   - Talked with peter on RFC and he suggested me to add a field inside 
>     monitor struct to get VM name.
>  
>  src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++----------------
>  1 file changed, 29 insertions(+), 18 deletions(-)
> 
> diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
> index dda6ae9796..c3a0227795 100644
> --- a/src/qemu/qemu_monitor.c
> +++ b/src/qemu/qemu_monitor.c

[...]

> @@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>      qemuMonitor *mon = opaque;
>      bool error = false;
>      bool hangup = false;
> +    virDomainObj *vm = NULL;
> +    char *vmName = NULL;

These local variables are not very useful, you can access 'mon' directly
without the need for this alias ...

>  
>      virObjectRef(mon);
>  
> +    vm = mon->vm;
> +    vmName = mon->domainName;
> +
>      /* lock access to the monitor and protect fd */
>      virObjectLock(mon);
>  #if DEBUG_IO
> -    VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond);
> +    VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, socket, cond, vm, NULLSTR(vmName));
>  #endif

There's no much point in adding to thhese debug statements (inside
DEBUG_IO) as they are not compiled by default. In fact I'll propose a
removal of those.

>      if (mon->fd == -1 || !mon->watch) {
>          virObjectUnlock(mon);
> @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>  
>          if (!error && !mon->goteof &&
>              cond & G_IO_ERR) {
> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                           _("Invalid file descriptor while waiting for monitor"));
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("%s: Invalid file descriptor while waiting for monitor"), NULLSTR(vmName));

Do not prefix the error message by the VM name, but rather put it at the
end or inside:

 Invalid file descriptor while waiting for monitor (vm='%s')

or smilar.

>              mon->goteof = true;
>          }
>      }
> @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>              virResetLastError();
>          } else {
>              if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof)
> -                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                               _("Error while processing monitor IO"));
> +                virReportError(VIR_ERR_INTERNAL_ERROR,
> +                               _("%s: Error while processing monitor IO"), NULLSTR(vmName));

Same here. Additionally did you ever get to a situation where vmName
would be NULL?

>              virCopyLastError(&mon->lastError);
>              virResetLastError();
>          }
>  
> -        VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message));
> +        VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s",
> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));

This is a good example.

>          /* If IO process resulted in an error & we have a message,
>           * then wakeup that waiter */
>          if (mon->msg && !mon->msg->finished) {
> @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED,
>       * will try to acquire the virDomainObj *mutex next */
>      if (mon->goteof) {
>          qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify;
> -        virDomainObj *vm = mon->vm;

You can remove this local when you are accessing it directly.

>  
>          /* Make sure anyone waiting wakes up now */
>          virCondSignal(&mon->notify);
>          virObjectUnlock(mon);
> -        VIR_DEBUG("Triggering EOF callback");
> +        VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s",
> +                  mon, vm, NULLSTR(vmName));
>          (eofNotify)(mon, vm, mon->callbackOpaque);
>          virObjectUnref(mon);
>      } else if (error) {
>          qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify;
> -        virDomainObj *vm = mon->vm;
>  
>          /* Make sure anyone waiting wakes up now */
>          virCondSignal(&mon->notify);
>          virObjectUnlock(mon);
> -        VIR_DEBUG("Triggering error callback");
> +        VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s",
> +                  mon, vm, NULLSTR(vmName));
>          (errorNotify)(mon, vm, mon->callbackOpaque);
>          virObjectUnref(mon);
>      } else {
> @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm,
>      mon->fd = fd;
>      mon->context = g_main_context_ref(context);
>      mon->vm = virObjectRef(vm);
> +    mon->domainName = g_strdup(vm->def->name);

As noted above, did you ever run into a situation where this would be
NULL? That would seem weird.

>      mon->waitGreeting = true;
>      mon->cb = cb;
>      mon->callbackOpaque = opaque;
> @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon,
>                  qemuMonitorMessage *msg)
>  {
>      int ret = -1;
> +    virDomainObj *vm = mon->vm;
> +    char *vmName = mon->domainName;

Avoid these.

>  
>      /* Check whether qemu quit unexpectedly */
>      if (mon->lastError.code != VIR_ERR_OK) {
> -        VIR_DEBUG("Attempt to send command while error is set %s",
> -                  NULLSTR(mon->lastError.message));
> +        VIR_DEBUG("Attempt to send command while error is set %s mon=%p vm=%p name=%s",
> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>          virSetError(&mon->lastError);
>          return -1;
>      }
>      if (mon->goteof) {
> -        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                       _("End of file from qemu monitor"));
> +        virReportError(VIR_ERR_INTERNAL_ERROR,
> +                       _("%s: End of file from qemu monitor"), NULLSTR(vmName));

No prefixing.

>          return -1;
>      }
>  
> @@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon,
>  
>      while (!mon->msg->finished) {
>          if (virCondWait(&mon->notify, &mon->parent.lock) < 0) {
> -            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
> -                           _("Unable to wait on monitor condition"));
> +            virReportError(VIR_ERR_INTERNAL_ERROR,
> +                           _("%s: Unable to wait on monitor condition"), NULLSTR(vmName));

Same here.

>              goto cleanup;
>          }
>      }
>  
>      if (mon->lastError.code != VIR_ERR_OK) {
> -        VIR_DEBUG("Send command resulted in error %s",
> -                  NULLSTR(mon->lastError.message));
> +        VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p name=%s",
> +                  NULLSTR(mon->lastError.message), mon, vm, NULLSTR(vmName));
>          virSetError(&mon->lastError);
>          goto cleanup;
>      }
> -- 
> 2.25.1
>