On Thu, Apr 07, 2011 at 10:38:34AM +0100, Daniel P. Berrange wrote: > On Wed, Apr 06, 2011 at 03:19:55PM +0800, Hu Tao wrote: > > --- > > src/qemu/qemu_domain.c | 30 ++----------- > > src/qemu/qemu_migration.c | 2 - > > src/qemu/qemu_monitor.c | 109 ++++++++++++++++++++++++-------------------- > > src/qemu/qemu_monitor.h | 4 +- > > src/qemu/qemu_process.c | 32 +++++++------- > > 5 files changed, 81 insertions(+), 96 deletions(-) > > > > diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c > > index 3a3c953..d11dc5f 100644 > > --- a/src/qemu/qemu_domain.c > > +++ b/src/qemu/qemu_domain.c > > @@ -560,9 +560,8 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj) > > { > > qemuDomainObjPrivatePtr priv = obj->privateData; > > > > - qemuMonitorLock(priv->mon); > > - qemuMonitorRef(priv->mon); > > virDomainObjUnlock(obj); > > + qemuMonitorLock(priv->mon); > > } > > > > > > @@ -573,18 +572,9 @@ void qemuDomainObjEnterMonitor(virDomainObjPtr obj) > > void qemuDomainObjExitMonitor(virDomainObjPtr obj) > > { > > qemuDomainObjPrivatePtr priv = obj->privateData; > > - int refs; > > - > > - refs = qemuMonitorUnref(priv->mon); > > - > > - if (refs > 0) > > - qemuMonitorUnlock(priv->mon); > > > > + qemuMonitorUnlock(priv->mon); > > virDomainObjLock(obj); > > - > > - if (refs == 0) { > > - priv->mon = NULL; > > - } > > } > > > > > > @@ -601,10 +591,8 @@ void qemuDomainObjEnterMonitorWithDriver(struct qemud_driver *driver, > > { > > qemuDomainObjPrivatePtr priv = obj->privateData; > > > > - qemuMonitorLock(priv->mon); > > - qemuMonitorRef(priv->mon); > > virDomainObjUnlock(obj); > > - qemuDriverUnlock(driver); > > + qemuMonitorLock(priv->mon); > > } > > > > > > @@ -617,19 +605,9 @@ void qemuDomainObjExitMonitorWithDriver(struct qemud_driver *driver, > > virDomainObjPtr obj) > > { > > qemuDomainObjPrivatePtr priv = obj->privateData; > > - int refs; > > - > > - refs = qemuMonitorUnref(priv->mon); > > - > > - if (refs > 0) > > - qemuMonitorUnlock(priv->mon); > > > > - qemuDriverLock(driver); > > + qemuMonitorUnlock(priv->mon); > > virDomainObjLock(obj); > > - > > - if (refs == 0) { > > - priv->mon = NULL; > > - } > > } > > This means that the 'driver' lock is now whenever any QEMU > monitor command is runing, which blocks the entire driver. qemuDomainObjEnterMonitorWithDriver is now called without holding qemu driver lock. > > > > > void qemuDomainObjEnterRemoteWithDriver(struct qemud_driver *driver, > > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > > index 462e6be..6af2e24 100644 > > --- a/src/qemu/qemu_migration.c > > +++ b/src/qemu/qemu_migration.c > > @@ -224,11 +224,9 @@ qemuMigrationWaitForCompletion(struct qemud_driver *driver, virDomainObjPtr vm) > > } > > > > virDomainObjUnlock(vm); > > - qemuDriverUnlock(driver); > > > > nanosleep(&ts, NULL); > > > > - qemuDriverLock(driver); > > virDomainObjLock(vm); > > } > > Holding the 'driver' lock while sleeping blocks the entire > QEMU driver. Now qemuMigrationWaitForCompletion should be called without holding qemu driver lock. > > > diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c > > index 244b22a..4b9087f 100644 > > --- a/src/qemu/qemu_process.c > > +++ b/src/qemu/qemu_process.c > > @@ -107,7 +107,6 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, > > > > VIR_DEBUG("Received EOF on %p '%s'", vm, vm->def->name); > > > > - qemuDriverLock(driver); > > virDomainObjLock(vm); > > > > if (!virDomainObjIsActive(vm)) { > > @@ -133,15 +132,17 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, > > qemuProcessStop(driver, vm, 0); > > qemuAuditDomainStop(vm, hasError ? "failed" : "shutdown"); > > > > - if (!vm->persistent) > > + if (!vm->persistent) { > > + qemuDriverLock(driver); > > virDomainRemoveInactive(&driver->domains, vm); > > - else > > - virDomainObjUnlock(vm); > > + qemuDriverUnlock(driver); > > + } > > + > > + virDomainObjUnlock(vm); > > > > if (event) { > > qemuDomainEventQueue(driver, event); > > } > > - qemuDriverUnlock(driver); > > } > > This violates the lock ordering rules. The 'driver' lock *must* be > obtained *before* any 'vm' lock is held. Excepting for introducing virObject for reference-counting, this series also simplifies the usage of lock: if you want to read/write qemu driver data, it is enough to first acquire qemu driver lock only; if you want to read/write virDomainObj data, it is enough to first acquire virDomainObj lock only; same for others. And we'd better to avoid acquiring two locks at the same time. So yes, the code here is problematic, it should be ideally like this: virDomainObjLock(vm); if (!vm->persistent) { lock_hashtable(doms); /* hashtable's own lock to protect itself */ virDomainRemoveInactive(doms, vm); unlock_hashtable(doms); } virDomainObjUnlock(vm); But it lacks hashtable lock, how about change the code like this: virDomainObjLock(vm); persistent = vm->persistent; virDomainObjUnlock(vm); /* chances that others change vm->persistent and we remove vm mistakenly :( */ if (!persistent) { qemuDriverLock(driver); virDomainRemoveInactive(doms, vm); qemuDriverUnlock(driver); } Or is there a better way? > > Now we have some places in the code which do > > lock(vm) > lock(driver) > > and other places which do > > lock(driver) > lock(vm) > > so 2 threads can trivially deadlock waiting for each other > > Daniel > -- > |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| > |: http://libvirt.org -o- http://virt-manager.org :| > |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| > |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list