On Tue, Dec 07, 2021 at 05:34:00AM -0800, Rohit Kumar wrote: > This patch is to determine the VM which had IO or socket hangup error. > > Signed-off-by: Rohit Kumar <rohit.kumar3@xxxxxxxxxxx> > --- > src/qemu/qemu_monitor.c | 46 +++++++++++++++++++++++++---------------- > 1 file changed, 28 insertions(+), 18 deletions(-) > > diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c > index 75e0e4ed92..d36db280d9 100644 > --- a/src/qemu/qemu_monitor.c > +++ b/src/qemu/qemu_monitor.c > @@ -530,13 +530,19 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > qemuMonitor *mon = opaque; > bool error = false; > bool hangup = false; > + virDomainObj *vm = mon->vm; > + g_autofree char *vmName = NULL; > + > + if (vm != NULL && vm->def != NULL) { > + vmName = g_strdup(vm->def->name); > + } This looks a little questionable. Although we hold a reference on 'vm', this code doesn't do anything to protect its access of 'vm->def'. If we were protected when accesing vm->def, then we wouldn't need to strdup it anyway. > > virObjectRef(mon); > > /* lock access to the monitor and protect fd */ > virObjectLock(mon); > #if DEBUG_IO > - VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond); > + VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, socket, cond, vm, NULLSTR(vmName)); > #endif > if (mon->fd == -1 || !mon->watch) { > virObjectUnlock(mon); > @@ -583,8 +589,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > > if (!error && !mon->goteof && > cond & G_IO_ERR) { > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > - _("Invalid file descriptor while waiting for monitor")); > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Invalid file descriptor while waiting for monitor vm_name=%s"), NULLSTR(vmName)); > mon->goteof = true; > } > } > @@ -609,13 +615,13 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > virResetLastError(); > } else { > if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof) > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > - _("Error while processing monitor IO")); > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Error while processing monitor IO vm_name=%s"), NULLSTR(vmName)); > virCopyLastError(&mon->lastError); > virResetLastError(); > } > > - VIR_DEBUG("Error on monitor %s", NULLSTR(mon->lastError.message)); > + VIR_DEBUG("Error on monitor %s vm=%p name=%s", NULLSTR(mon->lastError.message), vm, NULLSTR(vmName)); > /* If IO process resulted in an error & we have a message, > * then wakeup that waiter */ > if (mon->msg && !mon->msg->finished) { > @@ -631,22 +637,20 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > * will try to acquire the virDomainObj *mutex next */ > if (mon->goteof) { > qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify; > - virDomainObj *vm = mon->vm; > > /* Make sure anyone waiting wakes up now */ > virCondSignal(&mon->notify); > virObjectUnlock(mon); > - VIR_DEBUG("Triggering EOF callback"); > + VIR_DEBUG("Triggering EOF callback vm=%p name=%s", vm, NULLSTR(vmName)); > (eofNotify)(mon, vm, mon->callbackOpaque); > virObjectUnref(mon); > } else if (error) { > qemuMonitorErrorNotifyCallback errorNotify = mon->cb->errorNotify; > - virDomainObj *vm = mon->vm; > > /* Make sure anyone waiting wakes up now */ > virCondSignal(&mon->notify); > virObjectUnlock(mon); > - VIR_DEBUG("Triggering error callback"); > + VIR_DEBUG("Triggering error callback vm=%p name=%s", vm, NULLSTR(vmName)); > (errorNotify)(mon, vm, mon->callbackOpaque); > virObjectUnref(mon); > } else { > @@ -932,17 +936,23 @@ qemuMonitorSend(qemuMonitor *mon, > qemuMonitorMessage *msg) > { > int ret = -1; > + virDomainObj *vm = mon->vm; > + g_autofree char *vmName = NULL; > + > + if (vm != NULL && vm->def != NULL) { > + vmName = g_strdup(vm->def->name); > + } > > /* Check whether qemu quit unexpectedly */ > if (mon->lastError.code != VIR_ERR_OK) { > - VIR_DEBUG("Attempt to send command while error is set %s", > - NULLSTR(mon->lastError.message)); > + VIR_DEBUG("Attempt to send command while error is set %s vm=%p name=%s", > + NULLSTR(mon->lastError.message), vm, NULLSTR(vmName)); > virSetError(&mon->lastError); > return -1; > } > if (mon->goteof) { > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > - _("End of file from qemu monitor")); > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("End of file from qemu monitor vm_name=%s"), NULLSTR(vmName)); > return -1; > } > > @@ -955,15 +965,15 @@ qemuMonitorSend(qemuMonitor *mon, > > while (!mon->msg->finished) { > if (virCondWait(&mon->notify, &mon->parent.lock) < 0) { > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > - _("Unable to wait on monitor condition")); > + virReportError(VIR_ERR_INTERNAL_ERROR, > + _("Unable to wait on monitor condition vm_name=%s"), NULLSTR(vmName)); > goto cleanup; > } > } > > if (mon->lastError.code != VIR_ERR_OK) { > - VIR_DEBUG("Send command resulted in error %s", > - NULLSTR(mon->lastError.message)); > + VIR_DEBUG("Send command resulted in error %s vm=%p name=%s", > + NULLSTR(mon->lastError.message), vm, NULLSTR(vmName)); > virSetError(&mon->lastError); > goto cleanup; > } > -- > 2.25.1 > Regards, Daniel -- |: https://berrange.com -o- https://www.flickr.com/photos/dberrange :| |: https://libvirt.org -o- https://fstop138.berrange.com :| |: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|