On Tue, 4 Jan 2022, Rohit Kumar wrote: > > On 03/01/22 7:12 pm, Ani Sinha wrote: > > > > On Wed, 22 Dec 2021, Rohit Kumar wrote: > > > > > This patch is to determine the VM which had IO or socket hangup error. > > > Accessing directly vm->def->name inside qemuMonitorIO() or > > > qemuMonitorSend() > > > might leads to illegal access as we are out of 'vm' context and vm->def > > > might > > > not exist. Adding a field "domainName" inside mon object to access vm name > > > and initialising it when creating mon object. > > The commit message should desribe the purpose of the change, not how it is > > done. That can be easily understood reading the code. As for why the > > implementation follows a particular way, we can explain it in the code > > comments. > > > > This change adds the domain name in the error and debug logs during > > monitor IO processing so that we may infer which VM experienced those > > errors etc. This may help in debugging monitor IO errors. IMHO this > > decription is enough. > Thanks Ani, I will update the commit message in the next patch. > > > > > Signed-off-by: Rohit Kumar <rohit.kumar3@xxxxxxxxxxx> > > > --- > > > diff to v1: > > > - Adding a field domainName inside _qemuMonitor struct for accessing vm > > > name > > > instead of directly accessing mon->vm->def->name. > > > - Link to v1: > > > https://urldefense.proofpoint.com/v2/url?u=https-3A__listman.redhat.com_archives_libvir-2Dlist_2021-2DDecember_msg00217.html&d=DwIBAg&c=s883GpUCOChKOHiocYtGcg&r=ABSkr7gy7ZTfApFfI-Xxt1gZNtsDDiXoXOXc0OrkyFs&m=W2jEgk2s_xjLOLC-j8lPBnwVgteHS0-yYEGMqin16nmfzndzNgBE9mEvtbvyVzQo&s=Sa2q9edb4cvXo-EUriIPwlJxNo-itpp6--ZxY7rla2U&e= > > > - Talked with peter on RFC and he suggested me to add a field inside > > > monitor struct to get VM name. > > > > > > src/qemu/qemu_monitor.c | 47 +++++++++++++++++++++++++---------------- > > > 1 file changed, 29 insertions(+), 18 deletions(-) > > > > > > diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c > > > index dda6ae9796..c3a0227795 100644 > > > --- a/src/qemu/qemu_monitor.c > > > +++ b/src/qemu/qemu_monitor.c > > > @@ -80,6 +80,7 @@ struct _qemuMonitor { > > > GSource *watch; > > > > > > virDomainObj *vm; > > > + char *domainName; > > > > > Here we can explain why we are adding an extra member as opposed to > > referencing it directly. > Right, we can access it directly. > > > > > qemuMonitorCallbacks *cb; > > > void *callbackOpaque; > > > @@ -243,6 +244,7 @@ qemuMonitorDispose(void *obj) > > > virCondDestroy(&mon->notify); > > > g_free(mon->buffer); > > > g_free(mon->balloonpath); > > > + g_free(mon->domainName); > > > } > > > > > > > > > @@ -530,13 +532,18 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > > > qemuMonitor *mon = opaque; > > > bool error = false; > > > bool hangup = false; > > > + virDomainObj *vm = NULL; > > > + char *vmName = NULL; > > > > > These initializations are redundant since ... > Acknowledged. > > > virObjectRef(mon); > > > > > > + vm = mon->vm; > > > + vmName = mon->domainName; > > you initialize them here anyway. > I will remove these in next patch. > > > > > + > > > /* lock access to the monitor and protect fd */ > > > virObjectLock(mon); > > > #if DEBUG_IO > > > - VIR_DEBUG("Monitor %p I/O on socket %p cond %d", mon, socket, cond); > > > + VIR_DEBUG("Monitor %p I/O on socket %p cond %d vm=%p name=%s", mon, > > > socket, cond, vm, NULLSTR(vmName)); > > > #endif > > > if (mon->fd == -1 || !mon->watch) { > > > virObjectUnlock(mon); > > > @@ -583,8 +590,8 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > > > > > > if (!error && !mon->goteof && > > > cond & G_IO_ERR) { > > > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > > - _("Invalid file descriptor while waiting for > > > monitor")); > > > + virReportError(VIR_ERR_INTERNAL_ERROR, > > > + _("%s: Invalid file descriptor while waiting > > > for monitor"), NULLSTR(vmName)); > > As per > > https://urldefense.proofpoint.com/v2/url?u=https-3A__libvirt.org_coding-2Dstyle.html-23error-2Dmessage-2Dformat&d=DwIBAg&c=s883GpUCOChKOHiocYtGcg&r=ABSkr7gy7ZTfApFfI-Xxt1gZNtsDDiXoXOXc0OrkyFs&m=W2jEgk2s_xjLOLC-j8lPBnwVgteHS0-yYEGMqin16nmfzndzNgBE9mEvtbvyVzQo&s=zU-TBR-xTsKRXEW5gdgL0WRRYBg7B7cbLluu65_PfkI&e= > > , please > > enclose %s within quotes: > > _("'%s': Invalid file descriptor while waiting for monitor") > Ack. Thanks. > > > mon->goteof = true; > > > } > > > } > > > @@ -609,13 +616,14 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > > > virResetLastError(); > > > } else { > > > if (virGetLastErrorCode() == VIR_ERR_OK && !mon->goteof) > > > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > > - _("Error while processing monitor IO")); > > > + virReportError(VIR_ERR_INTERNAL_ERROR, > > > + _("%s: Error while processing monitor > > > IO"), NULLSTR(vmName)); > > Ditto. > Ack. > > > virCopyLastError(&mon->lastError); > > > virResetLastError(); > > > } > > > > > > - VIR_DEBUG("Error on monitor %s", > > > NULLSTR(mon->lastError.message)); > > > + VIR_DEBUG("Error on monitor %s mon=%p vm=%p name=%s", > > > + NULLSTR(mon->lastError.message), mon, vm, > > > NULLSTR(vmName)); > > > /* If IO process resulted in an error & we have a message, > > > * then wakeup that waiter */ > > > if (mon->msg && !mon->msg->finished) { > > > @@ -631,22 +639,22 @@ qemuMonitorIO(GSocket *socket G_GNUC_UNUSED, > > > * will try to acquire the virDomainObj *mutex next */ > > > if (mon->goteof) { > > > qemuMonitorEofNotifyCallback eofNotify = mon->cb->eofNotify; > > > - virDomainObj *vm = mon->vm; > > > > > > /* Make sure anyone waiting wakes up now */ > > > virCondSignal(&mon->notify); > > > virObjectUnlock(mon); > > > - VIR_DEBUG("Triggering EOF callback"); > > > + VIR_DEBUG("Triggering EOF callback mon=%p vm=%p name=%s", > > > + mon, vm, NULLSTR(vmName)); > > > (eofNotify)(mon, vm, mon->callbackOpaque); > > > virObjectUnref(mon); > > > } else if (error) { > > > qemuMonitorErrorNotifyCallback errorNotify = > > > mon->cb->errorNotify; > > > - virDomainObj *vm = mon->vm; > > > > > > /* Make sure anyone waiting wakes up now */ > > > virCondSignal(&mon->notify); > > > virObjectUnlock(mon); > > > - VIR_DEBUG("Triggering error callback"); > > > + VIR_DEBUG("Triggering error callback mon=%p vm=%p name=%s", > > > + mon, vm, NULLSTR(vmName)); > > > (errorNotify)(mon, vm, mon->callbackOpaque); > > > virObjectUnref(mon); > > > } else { > > > @@ -694,6 +702,7 @@ qemuMonitorOpenInternal(virDomainObj *vm, > > > mon->fd = fd; > > > mon->context = g_main_context_ref(context); > > > mon->vm = virObjectRef(vm); > > > + mon->domainName = g_strdup(vm->def->name); > > do not forget to g_free() this during cleanup in the same function. > So, in cleanup: qemuMonitorDispose is called. And there I have added g_free() > to clean mon->domainName. No, in cleanup, I see qemuMonitorClose() is called where do you do not add any additional code to free the allocation. This is what I see in cleanup code: ``` cleanup: /* We don't want the 'destroy' callback invoked during * cleanup from construction failure, because that can * give a double-unref on virDomainObj *in the caller, * so kill the callbacks now. */ mon->cb = NULL; /* The caller owns 'fd' on failure */ mon->fd = -1; qemuMonitorClose(mon); return NULL; ``` > > > > > mon->waitGreeting = true; > > > mon->cb = cb; > > > mon->callbackOpaque = opaque; > > > @@ -932,17 +941,19 @@ qemuMonitorSend(qemuMonitor *mon, > > > qemuMonitorMessage *msg) > > > { > > > int ret = -1; > > > + virDomainObj *vm = mon->vm; > > > + char *vmName = mon->domainName; > > > > > > /* Check whether qemu quit unexpectedly */ > > > if (mon->lastError.code != VIR_ERR_OK) { > > > - VIR_DEBUG("Attempt to send command while error is set %s", > > > - NULLSTR(mon->lastError.message)); > > > + VIR_DEBUG("Attempt to send command while error is set %s mon=%p > > > vm=%p name=%s", > > > + NULLSTR(mon->lastError.message), mon, vm, > > > NULLSTR(vmName)); > > > virSetError(&mon->lastError); > > > return -1; > > > } > > > if (mon->goteof) { > > > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > > - _("End of file from qemu monitor")); > > > + virReportError(VIR_ERR_INTERNAL_ERROR, > > > + _("%s: End of file from qemu monitor"), > > > NULLSTR(vmName)); > > ditto re error string format. > Ack. > > > > > return -1; > > > } > > > > > > @@ -955,15 +966,15 @@ qemuMonitorSend(qemuMonitor *mon, > > > > > > while (!mon->msg->finished) { > > > if (virCondWait(&mon->notify, &mon->parent.lock) < 0) { > > > - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", > > > - _("Unable to wait on monitor condition")); > > > + virReportError(VIR_ERR_INTERNAL_ERROR, > > > + _("%s: Unable to wait on monitor condition"), > > > NULLSTR(vmName)); > > ditto as above. > Ack. > > > goto cleanup; > > > } > > > } > > > > > > if (mon->lastError.code != VIR_ERR_OK) { > > > - VIR_DEBUG("Send command resulted in error %s", > > > - NULLSTR(mon->lastError.message)); > > > + VIR_DEBUG("Send command resulted in error %s mon=%p vm=%p > > > name=%s", > > > + NULLSTR(mon->lastError.message), mon, vm, > > > NULLSTR(vmName)); > > > virSetError(&mon->lastError); > > > goto cleanup; > > > } > > > -- > > > 2.25.1 > > > > > > >