Thanks. I know from https://www.redhat.com/archives/libvir-list/2009-December/msg00064.html that there is a deadlock problem in this fix. I would appreciate a notice when a solution is found. Thanks a lot. -- Shi Jin, PhD --- On Thu, 11/26/09, Daniel P. Berrange <berrange@xxxxxxxxxx> wrote: > From: Daniel P. Berrange <berrange@xxxxxxxxxx> > Subject: [libvirt] [PATCH 03/14] Fix crash when deleting monitor while a command is in progress > To: libvir-list@xxxxxxxxxx > Date: Thursday, November 26, 2009, 11:27 AM > If QEMU shuts down while we're in the > middle of processing a > monitor command, the monitor will be freed, and upon > cleaning > up we attempt to do qemuMonitorUnlock(priv->mon) > when priv->mon > is NULL. > > To address this we introduce proper reference counting > into > the qemuMonitorPtr object, and hold an extra reference > whenever > executing a command. > > * src/qemu/qemu_driver.c: Hold a reference on the monitor > while > executing commands, and only NULL-ify the > priv->mon field when > the last reference is released > * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add > reference > counting to handle safe deletion of monitor objects > --- > src/qemu/qemu_driver.c | 13 > ++++++-- > src/qemu/qemu_monitor.c | 81 > +++++++++++++++++++++++++++++------------------ > src/qemu/qemu_monitor.h | 5 ++- > 3 files changed, 64 insertions(+), 35 deletions(-) > > diff --git a/src/qemu/qemu_driver.c > b/src/qemu/qemu_driver.c > index c9b5ac2..3befe3d 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -271,6 +271,7 @@ static void > qemuDomainObjEnterMonitor(virDomainObjPtr obj) > qemuDomainObjPrivatePtr priv = > obj->privateData; > > qemuMonitorLock(priv->mon); > + qemuMonitorRef(priv->mon); > virDomainObjUnlock(obj); > } > > @@ -281,10 +282,15 @@ static void > qemuDomainObjEnterMonitor(virDomainObjPtr obj) > */ > static void qemuDomainObjExitMonitor(virDomainObjPtr obj) > { > + int refs; > qemuDomainObjPrivatePtr priv = > obj->privateData; > > + refs = qemuMonitorUnref(priv->mon); > qemuMonitorUnlock(priv->mon); > virDomainObjLock(obj); > + > + if (refs == 0) > + priv->mon = NULL; > } > > > @@ -301,6 +307,7 @@ static void > qemuDomainObjEnterMonitorWithDriver(struct qemud_driver > *driver, vir > qemuDomainObjPrivatePtr priv = > obj->privateData; > > qemuMonitorLock(priv->mon); > + qemuMonitorRef(priv->mon); > virDomainObjUnlock(obj); > qemuDriverUnlock(driver); > } > @@ -315,6 +322,7 @@ static void > qemuDomainObjExitMonitorWithDriver(struct qemud_driver > *driver, virD > { > qemuDomainObjPrivatePtr priv = > obj->privateData; > > + qemuMonitorUnref(priv->mon); > qemuMonitorUnlock(priv->mon); > qemuDriverLock(driver); > virDomainObjLock(obj); > @@ -2399,10 +2407,9 @@ static void > qemudShutdownVMDaemon(virConnectPtr conn, > > _("Failed > to send SIGTERM to %s (%d)"), > > > vm->def->name, vm->pid); > > - if (priv->mon) { > - > qemuMonitorClose(priv->mon); > + if (priv->mon && > + qemuMonitorClose(priv->mon) > == 0) > priv->mon = > NULL; > - } > > if (vm->monitor_chr) { > if > (vm->monitor_chr->type == VIR_DOMAIN_CHR_TYPE_UNIX) > diff --git a/src/qemu/qemu_monitor.c > b/src/qemu/qemu_monitor.c > index f0ef81b..3829e7a 100644 > --- a/src/qemu/qemu_monitor.c > +++ b/src/qemu/qemu_monitor.c > @@ -42,6 +42,8 @@ struct _qemuMonitor { > virMutex lock; > virCond notify; > > + int refs; > + > virDomainObjPtr dom; > > int fd; > @@ -67,12 +69,14 @@ struct _qemuMonitor { > * the next monitor msg */ > int lastErrno; > > - /* If the monitor callback is currently > active */ > + /* If the monitor EOF callback is currently > active (stops more commands being run) */ > unsigned eofcb: 1; > - /* If the monitor callback should free the > closed monitor */ > + /* If the monitor is in process of shutting > down */ > unsigned closed: 1; > + > }; > > + > void qemuMonitorLock(qemuMonitorPtr mon) > { > virMutexLock(&mon->lock); > @@ -84,21 +88,33 @@ void qemuMonitorUnlock(qemuMonitorPtr > mon) > } > > > -static void qemuMonitorFree(qemuMonitorPtr mon, int > lockDomain) > +static void qemuMonitorFree(qemuMonitorPtr mon) > { > - VIR_DEBUG("mon=%p, lockDomain=%d", mon, > lockDomain); > - if (mon->vm) { > - if (lockDomain) > - > virDomainObjLock(mon->vm); > - if > (!virDomainObjUnref(mon->vm) && lockDomain) > - > virDomainObjUnlock(mon->vm); > - } > + VIR_DEBUG("mon=%p", mon); > + if (mon->vm) > + > virDomainObjUnref(mon->vm); > if > (virCondDestroy(&mon->notify) < 0) > {} > > virMutexDestroy(&mon->lock); > VIR_FREE(mon); > } > > +int qemuMonitorRef(qemuMonitorPtr mon) > +{ > + mon->refs++; > + return mon->refs; > +} > + > +int qemuMonitorUnref(qemuMonitorPtr mon) > +{ > + mon->refs--; > + > + if (mon->refs == 0) > + qemuMonitorFree(mon); > + > + return mon->refs; > +} > + > > static int > qemuMonitorOpenUnix(const char *monitor) > @@ -346,8 +362,10 @@ static void > qemuMonitorIO(int watch, int fd, int events, void *opaque) > { > qemuMonitorPtr mon = opaque; > int quit = 0, failed = 0; > + virDomainObjPtr vm; > > qemuMonitorLock(mon); > + qemuMonitorRef(mon); > VIR_DEBUG("Monitor %p I/O on watch > %d fd %d events %d", mon, watch, fd, events); > > if (mon->fd != fd || > mon->watch != watch) { > @@ -409,22 +427,20 @@ qemuMonitorIO(int watch, int fd, int > events, void *opaque) { > if (failed || quit) { > /* Make sure anyone > waiting wakes up now */ > > virCondSignal(&mon->notify); > - mon->eofcb = 1; > > qemuMonitorUnlock(mon); > > VIR_DEBUG("Triggering EOF callback error? > %d", failed); > mon->eofCB(mon, > mon->vm, failed); > > > qemuMonitorLock(mon); > - if (mon->closed) { > - > qemuMonitorUnlock(mon); > - > VIR_DEBUG("Delayed free of monitor %p", mon); > - > qemuMonitorFree(mon, 1); > - } else { > - > qemuMonitorUnlock(mon); > - } > - } else { > - qemuMonitorUnlock(mon); > } > + > + /* Safe 'vm' to an intermediate ptr, since > 'mon' may be > + * freed after the Unref call */ > + vm = mon->vm; > + virDomainObjLock(vm); > + if (qemuMonitorUnref(mon) > 0) > + qemuMonitorUnlock(mon); > + virDomainObjUnlock(vm); > } > > > @@ -453,6 +469,7 @@ qemuMonitorOpen(virDomainObjPtr vm, > return NULL; > } > mon->fd = -1; > + mon->refs = 1; > mon->vm = vm; > mon->eofCB = eofCB; > qemuMonitorLock(mon); > @@ -512,10 +529,14 @@ cleanup: > } > > > -void qemuMonitorClose(qemuMonitorPtr mon) > +int qemuMonitorClose(qemuMonitorPtr mon) > { > + int refs; > + > if (!mon) > - return; > + return 0; > + > + VIR_DEBUG("mon=%p", mon); > > qemuMonitorLock(mon); > if (!mon->closed) { > @@ -523,18 +544,17 @@ void qemuMonitorClose(qemuMonitorPtr > mon) > > virEventRemoveHandle(mon->watch); > if (mon->fd != > -1) > > close(mon->fd); > - /* NB: don't reset fd / > watch fields, since active > - * callback may > still want them */ > + /* NB: ordinarily one might > immediately set mon->watch to -1 > + * and mon->fd to > -1, but there may be a callback active > + * that is still > relying on these fields being valid. So > + * we merely close > them, but not clear their values and > + * use this explicit > 'closed' flag to track this state */ > mon->closed = 1; > } > > - if (mon->eofcb) { > - VIR_DEBUG("Mark monitor to be > deleted %p", mon); > + if ((refs = qemuMonitorUnref(mon)) > 0) > > qemuMonitorUnlock(mon); > - } else { > - VIR_DEBUG("Delete monitor now > %p", mon); > - qemuMonitorFree(mon, 0); > - } > + return refs; > } > > > @@ -552,7 +572,6 @@ int qemuMonitorSend(qemuMonitorPtr > mon, > > if (mon->eofcb) { > msg->lastErrno = > EIO; > - qemuMonitorUnlock(mon); > return -1; > } > > diff --git a/src/qemu/qemu_monitor.h > b/src/qemu/qemu_monitor.h > index 71688cb..27b43b7 100644 > --- a/src/qemu/qemu_monitor.h > +++ b/src/qemu/qemu_monitor.h > @@ -78,11 +78,14 @@ typedef int > (*qemuMonitorDiskSecretLookup)(qemuMonitorPtr mon, > qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm, > > > qemuMonitorEOFNotify eofCB); > > -void qemuMonitorClose(qemuMonitorPtr mon); > +int qemuMonitorClose(qemuMonitorPtr mon); > > void qemuMonitorLock(qemuMonitorPtr mon); > void qemuMonitorUnlock(qemuMonitorPtr mon); > > +int qemuMonitorRef(qemuMonitorPtr mon); > +int qemuMonitorUnref(qemuMonitorPtr mon); > + > void qemuMonitorRegisterDiskSecretLookup(qemuMonitorPtr > mon, > > > > qemuMonitorDiskSecretLookup secretCB); > > -- > 1.6.5.2 > > -- > Libvir-list mailing list > Libvir-list@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/libvir-list > -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list