Hi Daniel, I'm giving it some heavy testing and seems to be rock solid! Thanks a lot for Your effort! I'm going to add few tens of guests to torture libvirt with multiple simultaneous accesses even more... regards nik On Mon, Dec 07, 2009 at 02:18:44PM +0000, Daniel P. Berrange wrote: > On Thu, Dec 03, 2009 at 11:12:40AM +0000, Daniel P. Berrange wrote: > > On Thu, Dec 03, 2009 at 12:47:02AM +0100, Matthias Bolte wrote: > > > 2009/11/26 Daniel P. Berrange <berrange@xxxxxxxxxx>: > > > > If QEMU shuts down while we're in the middle of processing a > > > > monitor command, the monitor will be freed, and upon cleaning > > > > up we attempt to do qemuMonitorUnlock(priv->mon) when priv->mon > > > > is NULL. > > > > > > > > To address this we introduce proper reference counting into > > > > the qemuMonitorPtr object, and hold an extra reference whenever > > > > executing a command. > > > > > > > > * src/qemu/qemu_driver.c: Hold a reference on the monitor while > > > > executing commands, and only NULL-ify the priv->mon field when > > > > the last reference is released > > > > * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference > > > > counting to handle safe deletion of monitor objects > > > > > > The locking pattern below results in destroying a locked mutex. It > > > this intended? > > > > No, that's a bug, the qemuMonitorUnref() call itself should have > > called unlock if the ref count hit 0, before destroying it. > > > > > qemuMonitorLock(mon); > > > [...] > > > if (qemuMonitorUnref(mon) > 0) > > > qemuMonitorUnlock(mon); > > > > > > Well, this patch makes the TCK deadlock for me, seems to be a lock > > > ordering issue combined with a race condition; it doesn't happen every > > > run. I don't understand all details of the locking and refcounting > > > scheme of the QEMU monitor yet, it's quite complex and gets even more > > > complex. > > > > Yes, that problem shown by valgrind will indeed deadlock. I'll fix > > this. The domain object lock must never be acquired if the thread > > holds the monitor lock already. We must have strict ordering from > > top to bottom (driver -> domain object -> qemu monitor) > > > This is the patch I'm proposing instead. It removes the ref count adjust > of virDomainObjPtr from qemuMonitor, since that requires that you have > the bad lock ordering. Instead that extra ref count is added/removed by > the qemu driver code. This means the qemuMonitor never has to touch any > locks on virDomainObjPtr which complies with our strict top->bottom > ordering requirement. > > > commit c2b32f964b0eed861dea11e5037993e3a3c3fb3d > Author: Daniel P. Berrange <berrange@xxxxxxxxxx> > Date: Thu Nov 26 13:29:29 2009 +0000 > > Fix crash when deleting monitor while a command is in progress > > If QEMU shuts down while we're in the middle of processing a > monitor command, the monitor will be freed, and upon cleaning > up we attempt to do qemuMonitorUnlock(priv->mon) when priv->mon > is NULL. > > To address this we introduce proper reference counting into > the qemuMonitorPtr object, and hold an extra reference whenever > executing a command. > > * src/qemu/qemu_driver.c: Hold a reference on the monitor while > executing commands, and only NULL-ify the priv->mon field when > the last reference is released > * src/qemu/qemu_monitor.h, src/qemu/qemu_monitor.c: Add reference > counting to handle safe deletion of monitor objects > > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index 43d20ea..fd47dc0 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -272,6 +272,7 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj) > qemuDomainObjPrivatePtr priv = obj->privateData; > > qemuMonitorLock(priv->mon); > + qemuMonitorRef(priv->mon); > virDomainObjUnlock(obj); > } > > @@ -283,9 +284,16 @@ static void qemuDomainObjEnterMonitor(virDomainObjPtr obj) > static void qemuDomainObjExitMonitor(virDomainObjPtr obj) > { > qemuDomainObjPrivatePtr priv = obj->privateData; > + int refs; > > + refs = qemuMonitorUnref(priv->mon); > qemuMonitorUnlock(priv->mon); > virDomainObjLock(obj); > + > + if (refs == 0) { > + virDomainObjUnref(obj); > + priv->mon = NULL; > + } > } > > > @@ -302,6 +310,7 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir > qemuDomainObjPrivatePtr priv = obj->privateData; > > qemuMonitorLock(priv->mon); > + qemuMonitorRef(priv->mon); > virDomainObjUnlock(obj); > qemuDriverUnlock(driver); > } > @@ -315,10 +324,17 @@ static void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, vir > static void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, virDomainObjPtr obj) > { > qemuDomainObjPrivatePtr priv = obj->privateData; > + int refs; > > + refs = qemuMonitorUnref(priv->mon); > qemuMonitorUnlock(priv->mon); > qemuDriverLock(driver); > virDomainObjLock(obj); > + > + if (refs == 0) { > + virDomainObjUnref(obj); > + priv->mon = NULL; > + } > } > > > @@ -653,6 +669,10 @@ qemuConnectMonitor(virDomainObjPtr vm) > { > qemuDomainObjPrivatePtr priv = vm->privateData; > > + /* Hold an extra reference because we can't allow 'vm' to be > + * deleted while the monitor is active */ > + virDomainObjRef(vm); > + > if ((priv->mon = qemuMonitorOpen(vm, qemuHandleMonitorEOF)) == NULL) { > VIR_ERROR(_("Failed to connect monitor for %s\n"), vm->def->name); > return -1; > @@ -2400,8 +2420,9 @@ static void qemudShutdownVMDaemon(virConnectPtr conn, > _("Failed to send SIGTERM to %s (%d)"), > vm->def->name, vm->pid); > > - if (priv->mon) { > - qemuMonitorClose(priv->mon); > + if (priv->mon && > + qemuMonitorClose(priv->mon) == 0) { > + virDomainObjUnref(vm); > priv->mon = NULL; > } > > diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c > index f0ef81b..a2e10bc 100644 > --- a/src/qemu/qemu_monitor.c > +++ b/src/qemu/qemu_monitor.c > @@ -42,7 +42,7 @@ struct _qemuMonitor { > virMutex lock; > virCond notify; > > - virDomainObjPtr dom; > + int refs; > > int fd; > int watch; > @@ -67,12 +67,14 @@ struct _qemuMonitor { > * the next monitor msg */ > int lastErrno; > > - /* If the monitor callback is currently active */ > + /* If the monitor EOF callback is currently active (stops more commands being run) */ > unsigned eofcb: 1; > - /* If the monitor callback should free the closed monitor */ > + /* If the monitor is in process of shutting down */ > unsigned closed: 1; > + > }; > > + > void qemuMonitorLock(qemuMonitorPtr mon) > { > virMutexLock(&mon->lock); > @@ -84,21 +86,33 @@ void qemuMonitorUnlock(qemuMonitorPtr mon) > } > > > -static void qemuMonitorFree(qemuMonitorPtr mon, int lockDomain) > +static void qemuMonitorFree(qemuMonitorPtr mon) > { > - VIR_DEBUG("mon=%p, lockDomain=%d", mon, lockDomain); > - if (mon->vm) { > - if (lockDomain) > - virDomainObjLock(mon->vm); > - if (!virDomainObjUnref(mon->vm) && lockDomain) > - virDomainObjUnlock(mon->vm); > - } > + VIR_DEBUG("mon=%p", mon); > if (virCondDestroy(&mon->notify) < 0) > {} > virMutexDestroy(&mon->lock); > VIR_FREE(mon); > } > > +int qemuMonitorRef(qemuMonitorPtr mon) > +{ > + mon->refs++; > + return mon->refs; > +} > + > +int qemuMonitorUnref(qemuMonitorPtr mon) > +{ > + mon->refs--; > + > + if (mon->refs == 0) { > + qemuMonitorUnlock(mon); > + qemuMonitorFree(mon); > + } > + > + return mon->refs; > +} > + > > static int > qemuMonitorOpenUnix(const char *monitor) > @@ -348,6 +362,7 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) { > int quit = 0, failed = 0; > > qemuMonitorLock(mon); > + qemuMonitorRef(mon); > VIR_DEBUG("Monitor %p I/O on watch %d fd %d events %d", mon, watch, fd, events); > > if (mon->fd != fd || mon->watch != watch) { > @@ -407,23 +422,17 @@ qemuMonitorIO(int watch, int fd, int events, void *opaque) { > * but is this safe ? I think it is, because the callback > * will try to acquire the virDomainObjPtr mutex next */ > if (failed || quit) { > + qemuMonitorEOFNotify eofCB = mon->eofCB; > + virDomainObjPtr vm = mon->vm; > /* Make sure anyone waiting wakes up now */ > virCondSignal(&mon->notify); > - mon->eofcb = 1; > - qemuMonitorUnlock(mon); > - VIR_DEBUG("Triggering EOF callback error? %d", failed); > - mon->eofCB(mon, mon->vm, failed); > - > - qemuMonitorLock(mon); > - if (mon->closed) { > + if (qemuMonitorUnref(mon) > 0) > qemuMonitorUnlock(mon); > - VIR_DEBUG("Delayed free of monitor %p", mon); > - qemuMonitorFree(mon, 1); > - } else { > - qemuMonitorUnlock(mon); > - } > + VIR_DEBUG("Triggering EOF callback error? %d", failed); > + (eofCB)(mon, vm, failed); > } else { > - qemuMonitorUnlock(mon); > + if (qemuMonitorUnref(mon) > 0) > + qemuMonitorUnlock(mon); > } > } > > @@ -453,10 +462,10 @@ qemuMonitorOpen(virDomainObjPtr vm, > return NULL; > } > mon->fd = -1; > + mon->refs = 1; > mon->vm = vm; > mon->eofCB = eofCB; > qemuMonitorLock(mon); > - virDomainObjRef(vm); > > switch (vm->monitor_chr->type) { > case VIR_DOMAIN_CHR_TYPE_UNIX: > @@ -512,10 +521,14 @@ cleanup: > } > > > -void qemuMonitorClose(qemuMonitorPtr mon) > +int qemuMonitorClose(qemuMonitorPtr mon) > { > + int refs; > + > if (!mon) > - return; > + return 0; > + > + VIR_DEBUG("mon=%p", mon); > > qemuMonitorLock(mon); > if (!mon->closed) { > @@ -523,18 +536,17 @@ void qemuMonitorClose(qemuMonitorPtr mon) > virEventRemoveHandle(mon->watch); > if (mon->fd != -1) > close(mon->fd); > - /* NB: don't reset fd / watch fields, since active > - * callback may still want them */ > + /* NB: ordinarily one might immediately set mon->watch to -1 > + * and mon->fd to -1, but there may be a callback active > + * that is still relying on these fields being valid. So > + * we merely close them, but not clear their values and > + * use this explicit 'closed' flag to track this state */ > mon->closed = 1; > } > > - if (mon->eofcb) { > - VIR_DEBUG("Mark monitor to be deleted %p", mon); > + if ((refs = qemuMonitorUnref(mon)) > 0) > qemuMonitorUnlock(mon); > - } else { > - VIR_DEBUG("Delete monitor now %p", mon); > - qemuMonitorFree(mon, 0); > - } > + return refs; > } > > > @@ -552,7 +564,6 @@ int qemuMonitorSend(qemuMonitorPtr mon, > > if (mon->eofcb) { > msg->lastErrno = EIO; > - qemuMonitorUnlock(mon); > return -1; > } > > diff --git a/src/qemu/qemu_monitor.h b/src/qemu/qemu_monitor.h > index 71688cb..27b43b7 100644 > --- a/src/qemu/qemu_monitor.h > +++ b/src/qemu/qemu_monitor.h > @@ -78,11 +78,14 @@ typedef int (*qemuMonitorDiskSecretLookup)(qemuMonitorPtr mon, > qemuMonitorPtr qemuMonitorOpen(virDomainObjPtr vm, > qemuMonitorEOFNotify eofCB); > > -void qemuMonitorClose(qemuMonitorPtr mon); > +int qemuMonitorClose(qemuMonitorPtr mon); > > void qemuMonitorLock(qemuMonitorPtr mon); > void qemuMonitorUnlock(qemuMonitorPtr mon); > > +int qemuMonitorRef(qemuMonitorPtr mon); > +int qemuMonitorUnref(qemuMonitorPtr mon); > + > void qemuMonitorRegisterDiskSecretLookup(qemuMonitorPtr mon, > qemuMonitorDiskSecretLookup secretCB); > > > -- > |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| > |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| > |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| > |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| > > -- > Libvir-list mailing list > Libvir-list@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/libvir-list > -- ------------------------------------- Nikola CIPRICH LinuxBox.cz, s.r.o. 28. rijna 168, 709 01 Ostrava tel.: +420 596 603 142 fax: +420 596 621 273 mobil: +420 777 093 799 www.linuxbox.cz mobil servis: +420 737 238 656 email servis: servis@xxxxxxxxxxx ------------------------------------- -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list