On Thu, Apr 07, 2011 at 10:33:03AM +0100, Daniel P. Berrange wrote: > On Wed, Apr 06, 2011 at 03:19:51PM +0800, Hu Tao wrote: > > This patch also eliminates a dead-lock bug in > > qemuDomainObjBeginJobWithDriver: if virCondWaitUntil() timeouts, the > > thread tries to acquire qemu driver lock while holding virDomainObj > > lock. > > --- > > src/conf/domain_conf.c | 56 ++++---- > > src/conf/domain_conf.h | 6 +- > > src/openvz/openvz_conf.c | 8 +- > > src/qemu/qemu_domain.c | 32 ++--- > > src/qemu/qemu_domain.h | 2 +- > > src/qemu/qemu_driver.c | 304 ++++++++++++++++++++------------------------- > > src/qemu/qemu_migration.c | 45 +++---- > > src/qemu/qemu_process.c | 33 ++--- > > src/vmware/vmware_conf.c | 2 +- > > 9 files changed, 215 insertions(+), 273 deletions(-) > > > > diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c > > index 90a1317..fc76a00 100644 > > --- a/src/conf/domain_conf.c > > +++ b/src/conf/domain_conf.c > > @@ -47,6 +47,7 @@ > > #include "storage_file.h" > > #include "files.h" > > #include "bitmap.h" > > +#include "virobject.h" > > > > #define VIR_FROM_THIS VIR_FROM_DOMAIN > > > > @@ -395,9 +396,7 @@ static void > > virDomainObjListDataFree(void *payload, const void *name ATTRIBUTE_UNUSED) > > { > > virDomainObjPtr obj = payload; > > - virDomainObjLock(obj); > > - if (virDomainObjUnref(obj) > 0) > > - virDomainObjUnlock(obj); > > + virDomainObjUnref(obj); > > } > > > > int virDomainObjListInit(virDomainObjListPtr doms) > > @@ -437,7 +436,7 @@ virDomainObjPtr virDomainFindByID(const virDomainObjListPtr doms, > > virDomainObjPtr obj; > > obj = virHashSearch(doms->objs, virDomainObjListSearchID, &id); > > if (obj) > > - virDomainObjLock(obj); > > + virDomainObjRef(obj); > > return obj; > > } > > > > @@ -452,7 +451,7 @@ virDomainObjPtr virDomainFindByUUID(const virDomainObjListPtr doms, > > > > obj = virHashLookup(doms->objs, uuidstr); > > if (obj) > > - virDomainObjLock(obj); > > + virDomainObjRef(obj); > > return obj; > > } > > > > @@ -476,7 +475,7 @@ virDomainObjPtr virDomainFindByName(const virDomainObjListPtr doms, > > virDomainObjPtr obj; > > obj = virHashSearch(doms->objs, virDomainObjListSearchName, name); > > if (obj) > > - virDomainObjLock(obj); > > + virDomainObjRef(obj); > > return obj; > > } > > This is a major change in semantics, which makes pretty much > every single caller non-thread safe, unless the callers are > all changed todo virDomainObjLock immediately after calling > this. So I don't really see the point in this - it just means > more code duplication. If we do this: obj = virHashSearch(doms->objs, virDomainObjListSearchName, name); if (obj) virDomainObjLock(obj); return obj; And at the meantime another thread removes the same obj from doms->objs and frees it, than we are accessing a freed obj. lock doesn't help prevent object from being freed. > > > > > @@ -967,6 +966,12 @@ static void virDomainObjFree(virDomainObjPtr dom) > > { > > if (!dom) > > return; > > + virDomainObjUnref(dom); > > +} > > + > > +static void doDomainObjFree(virObjectPtr obj) > > +{ > > + virDomainObjPtr dom = (virDomainObjPtr)obj; > > > > VIR_DEBUG("obj=%p", dom); > > virDomainDefFree(dom->def); > > @@ -984,21 +989,13 @@ static void virDomainObjFree(virDomainObjPtr dom) > > > > void virDomainObjRef(virDomainObjPtr dom) > > { > > - dom->refs++; > > - VIR_DEBUG("obj=%p refs=%d", dom, dom->refs); > > + virObjectRef(&dom->obj); > > } > > > > > > -int virDomainObjUnref(virDomainObjPtr dom) > > +void virDomainObjUnref(virDomainObjPtr dom) > > { > > - dom->refs--; > > - VIR_DEBUG("obj=%p refs=%d", dom, dom->refs); > > - if (dom->refs == 0) { > > - virDomainObjUnlock(dom); > > - virDomainObjFree(dom); > > - return 0; > > - } > > - return dom->refs; > > + virObjectUnref(&dom->obj); > > } > > > > static virDomainObjPtr virDomainObjNew(virCapsPtr caps) > > @@ -1010,6 +1007,11 @@ static virDomainObjPtr virDomainObjNew(virCapsPtr caps) > > return NULL; > > } > > > > + if (virObjectInit(&domain->obj, doDomainObjFree)) { > > + VIR_FREE(domain); > > + return NULL; > > + } > > + > > if (caps->privateDataAllocFunc && > > !(domain->privateData = (caps->privateDataAllocFunc)())) { > > virReportOOMError(); > > @@ -1027,9 +1029,7 @@ static virDomainObjPtr virDomainObjNew(virCapsPtr caps) > > return NULL; > > } > > > > - virDomainObjLock(domain); > > domain->state = VIR_DOMAIN_SHUTOFF; > > - domain->refs = 1; > > > > virDomainSnapshotObjListInit(&domain->snapshots); > > > > @@ -1075,8 +1075,10 @@ virDomainObjPtr virDomainAssignDef(virCapsPtr caps, > > domain->def = def; > > > > virUUIDFormat(def->uuid, uuidstr); > > + virDomainObjRef(domain); > > if (virHashAddEntry(doms->objs, uuidstr, domain) < 0) { > > - VIR_FREE(domain); > > + virDomainObjUnref(domain); > > + virDomainObjFree(domain); > > return NULL; > > } > > Simiarly here, you're now requiring all callers to manually obtain > a lock. That is the desired result, lock right before do read/write and unlock it as soon as possible. > > > @@ -1239,39 +1243,57 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml, > > > > qemuDriverLock(driver); > > if (!(def = virDomainDefParseString(driver->caps, xml, > > - VIR_DOMAIN_XML_INACTIVE))) > > + VIR_DOMAIN_XML_INACTIVE))) { > > + qemuDriverUnlock(driver); > > goto cleanup; > > + } > > > > - if (virSecurityManagerVerify(driver->securityManager, def) < 0) > > + if (virSecurityManagerVerify(driver->securityManager, def) < 0) { > > + qemuDriverUnlock(driver); > > goto cleanup; > > + } > > > > - if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) > > + if (virDomainObjIsDuplicate(&driver->domains, def, 1) < 0) { > > + qemuDriverUnlock(driver); > > goto cleanup; > > + } > > > > - if (qemudCanonicalizeMachine(driver, def) < 0) > > + if (qemudCanonicalizeMachine(driver, def) < 0) { > > + qemuDriverUnlock(driver); > > goto cleanup; > > + } > > > > - if (qemuDomainAssignPCIAddresses(def) < 0) > > + if (qemuDomainAssignPCIAddresses(def) < 0) { > > + qemuDriverUnlock(driver); > > goto cleanup; > > + } > > > > if (!(vm = virDomainAssignDef(driver->caps, > > &driver->domains, > > - def, false))) > > + def, false))) { > > + qemuDriverUnlock(driver); > > goto cleanup; > > + } > > + > > + qemuDriverUnlock(driver); > > driver is now unlocked.... > > > > > def = NULL; > > > > - if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) > > + if (qemuDomainObjBeginJobWithDriver(driver, vm) < 0) { > > + virDomainObjUnref(vm); > > ...but this method *requires* driver to be locked. > > > goto cleanup; /* XXXX free the 'vm' we created ? */ > > + } > > > > if (qemuProcessStart(conn, driver, vm, NULL, > > (flags & VIR_DOMAIN_START_PAUSED) != 0, > > -1, NULL, VIR_VM_OP_CREATE) < 0) { > > qemuAuditDomainStart(vm, "booted", false); > > ...and this method writes to 'driver', so it is now unsafe. > > > - if (qemuDomainObjEndJob(vm) > 0) > > - virDomainRemoveInactive(&driver->domains, > > - vm); > > - vm = NULL; > > + qemuDomainObjEndJob(vm); > > + qemuDriverLock(driver); > > + virDomainRemoveInactive(&driver->domains, > > + vm); > > + qemuDriverUnlock(driver); > > + virDomainObjUnref(vm); > > goto cleanup; > > } > > > > @@ -1283,17 +1305,13 @@ static virDomainPtr qemudDomainCreate(virConnectPtr conn, const char *xml, > > dom = virGetDomain(conn, vm->def->name, vm->def->uuid); > > if (dom) dom->id = vm->def->id; > > > > - if (vm && > > - qemuDomainObjEndJob(vm) == 0) > > - vm = NULL; > > + qemuDomainObjEndJob(vm); > > + virDomainObjUnref(vm); > > > > cleanup: > > virDomainDefFree(def); > > - if (vm) > > - virDomainObjUnlock(vm); > > if (event) > > qemuDomainEventQueue(driver, event); > > - qemuDriverUnlock(driver); > > return dom; > > } > > > And all the usage of 'vm' in this method is now completely > unlocked and unsafe. > > > > > > @@ -1307,13 +1325,14 @@ static int qemudDomainSuspend(virDomainPtr dom) { > > > > qemuDriverLock(driver); > > vm = virDomainFindByUUID(&driver->domains, dom->uuid); > > + qemuDriverUnlock(driver); > > > > if (!vm) { > > char uuidstr[VIR_UUID_STRING_BUFLEN]; > > virUUIDFormat(dom->uuid, uuidstr); > > qemuReportError(VIR_ERR_NO_DOMAIN, > > _("no domain with matching uuid '%s'"), uuidstr); > > - goto cleanup; > > + return -1; > > } > > if (!virDomainObjIsActive(vm)) { > > qemuReportError(VIR_ERR_OPERATION_INVALID, > > @@ -1354,16 +1373,12 @@ static int qemudDomainSuspend(virDomainPtr dom) { > > } > > > > endjob: > > - if (qemuDomainObjEndJob(vm) == 0) > > - vm = NULL; > > + qemuDomainObjEndJob(vm); > > > > cleanup: > > - if (vm) > > - virDomainObjUnlock(vm); > > - > > + virDomainObjUnref(vm); > > if (event) > > qemuDomainEventQueue(driver, event); > > - qemuDriverUnlock(driver); > > return ret; > > } > > Also now completely unsafe since 'vm' is never locked while > making changes to it. Yes. Will improve qemudDomainSuspend. > > [cut rest of patch] > > I don't see how any of this patch is threadsafe now that virtually > no methods are acquiring the 'vm' lock. qemuDomainObjBeginJob does acquire vm lock. > > > Daniel > -- > |: http://berrange.com -o- http://www.flickr.com/photos/dberrange/ :| > |: http://libvirt.org -o- http://virt-manager.org :| > |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| > |: http://entangle-photo.org -o- http://live.gnome.org/gtk-vnc :| -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list