Please, disregard this patch. On 01.02.2016 12:21, Nikolay Shirokovskiy wrote: > A pretty nasty deadlock occurs while trying to rename a VM in parallel > with virDomainObjListNumOfDomains. > The short description of the problem is as follows: > > Thread #1: > > qemuDomainRename: > ------> aquires domain lock by qemuDomObjFromDomain > ---------> waits for domain list lock in any of the listed functions: > - virDomainObjListFindByName > - virDomainObjListRenameAddNew > - virDomainObjListRenameRemove > > Thread #2: > > virDomainObjListNumOfDomains: > ------> aquires domain list lock > ---------> waits for domain lock in virDomainObjListCount > > Introduce generic virDomainObjListRename function for renaming domains. > It aquires list lock in right order to avoid deadlock. Callback is used > to make driver specific domain updates. > --- > src/conf/virdomainobjlist.c | 98 +++++++++++++++++++----------- > src/conf/virdomainobjlist.h | 16 +++-- > src/libvirt_private.syms | 3 +- > src/qemu/qemu_driver.c | 142 ++++++++++++++++++++------------------------ > 4 files changed, 142 insertions(+), 117 deletions(-) > > diff --git a/src/conf/virdomainobjlist.c b/src/conf/virdomainobjlist.c > index 7568f93..6c0be62 100644 > --- a/src/conf/virdomainobjlist.c > +++ b/src/conf/virdomainobjlist.c > @@ -313,40 +313,6 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms, > } > > > -int > -virDomainObjListRenameAddNew(virDomainObjListPtr doms, > - virDomainObjPtr vm, > - const char *name) > -{ > - int ret = -1; > - virObjectLock(doms); > - > - /* Add new name into the hash table of domain names. */ > - if (virHashAddEntry(doms->objsName, name, vm) < 0) > - goto cleanup; > - > - /* Okay, this is crazy. virHashAddEntry() does not increment > - * the refcounter of @vm, but virHashRemoveEntry() does > - * decrement it. We need to work around it. */ > - virObjectRef(vm); > - > - ret = 0; > - cleanup: > - virObjectUnlock(doms); > - return ret; > -} > - > - > -int > -virDomainObjListRenameRemove(virDomainObjListPtr doms, const char *name) > -{ > - virObjectLock(doms); > - virHashRemoveEntry(doms->objsName, name); > - virObjectUnlock(doms); > - return 0; > -} > - > - > /* > * The caller must hold a lock on the driver owning 'doms', > * and must also have locked 'dom', to ensure no one else > @@ -372,6 +338,70 @@ void virDomainObjListRemove(virDomainObjListPtr doms, > } > > > +/* > + * The caller must hold a lock on dom. Callbacks should not sleep/wait othewise > + * operations on all domains will be blocked as the callback is called with > + * domains lock hold. Domain lock is dropped/reaquired during this operation > + * thus domain consistency must not rely on this lock solely. > + */ > + > +int > +virDomainObjListRename(virDomainObjListPtr doms, > + virDomainObjPtr dom, > + const char *new_name, > + unsigned int flags, > + virDomainObjListRenameCallback callback, > + void *opaque) > +{ > + int ret = -1; > + char *old_name = NULL; > + int rc; > + > + /* doms and dom locks must be attained in right order thus relock dom. */ > + /* dom reference is touched for the benefit of those callers that > + * hold a lock on dom but not refcount it. */ > + virObjectRef(dom); > + virObjectUnlock(dom); > + virObjectLock(doms); > + virObjectLock(dom); > + virObjectUnref(dom); > + > + if (STREQ(dom->def->name, new_name)) { > + virReportError(VIR_ERR_OPERATION_INVALID, "%s", > + _("Can't rename domain to itself")); > + goto cleanup; > + } > + > + if (virHashLookup(doms->objsName, new_name) != NULL) { > + virReportError(VIR_ERR_OPERATION_INVALID, > + _("domain with name '%s' already exists"), > + new_name); > + goto cleanup; > + } > + > + if (virHashAddEntry(doms->objsName, new_name, dom) < 0) > + goto cleanup; > + > + /* Okay, this is crazy. virHashAddEntry() does not increment > + * the refcounter of @dom, but virHashRemoveEntry() does > + * decrement it. We need to work around it. */ > + virObjectRef(dom); > + > + if (VIR_STRDUP(old_name, dom->def->name) < 0) > + goto cleanup; > + > + rc = callback(dom, new_name, flags, opaque); > + virHashRemoveEntry(doms->objsName, rc < 0 ? new_name : old_name); > + if (rc < 0) > + goto cleanup; > + > + ret = 0; > + cleanup: > + virObjectUnlock(doms); > + VIR_FREE(old_name); > + return ret; > +} > + > /* The caller must hold lock on 'doms' in addition to 'virDomainObjListRemove' > * requirements > * > diff --git a/src/conf/virdomainobjlist.h b/src/conf/virdomainobjlist.h > index f479598..c0f856c 100644 > --- a/src/conf/virdomainobjlist.h > +++ b/src/conf/virdomainobjlist.h > @@ -51,11 +51,17 @@ virDomainObjPtr virDomainObjListAdd(virDomainObjListPtr doms, > unsigned int flags, > virDomainDefPtr *oldDef); > > -int virDomainObjListRenameAddNew(virDomainObjListPtr doms, > - virDomainObjPtr vm, > - const char *name); > -int virDomainObjListRenameRemove(virDomainObjListPtr doms, > - const char *name); > +typedef int (*virDomainObjListRenameCallback)(virDomainObjPtr dom, > + const char *new_name, > + unsigned int flags, > + void *opaque); > +int virDomainObjListRename(virDomainObjListPtr doms, > + virDomainObjPtr dom, > + const char *new_name, > + unsigned int flags, > + virDomainObjListRenameCallback callback, > + void *opaque); > + > void virDomainObjListRemove(virDomainObjListPtr doms, > virDomainObjPtr dom); > void virDomainObjListRemoveLocked(virDomainObjListPtr doms, > diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms > index 3d0ec9d..3ef3468 100644 > --- a/src/libvirt_private.syms > +++ b/src/libvirt_private.syms > @@ -893,8 +893,7 @@ virDomainObjListNew; > virDomainObjListNumOfDomains; > virDomainObjListRemove; > virDomainObjListRemoveLocked; > -virDomainObjListRenameAddNew; > -virDomainObjListRenameRemove; > +virDomainObjListRename; > > > # cpu/cpu.h > diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c > index abcdbe6..c0e750d 100644 > --- a/src/qemu/qemu_driver.c > +++ b/src/qemu/qemu_driver.c > @@ -19976,14 +19976,14 @@ qemuDomainSetUserPassword(virDomainPtr dom, > } > > > -static int qemuDomainRename(virDomainPtr dom, > - const char *new_name, > - unsigned int flags) > +static int > +qemuDomainRenameCallback(virDomainObjPtr vm, > + const char *new_name, > + unsigned int flags, > + void *opaque) > { > - virQEMUDriverPtr driver = dom->conn->privateData; > + virQEMUDriverPtr driver = opaque; > virQEMUDriverConfigPtr cfg = NULL; > - virDomainObjPtr vm = NULL; > - virDomainObjPtr tmp_dom = NULL; > virObjectEventPtr event_new = NULL; > virObjectEventPtr event_old = NULL; > int ret = -1; > @@ -19993,73 +19993,16 @@ static int qemuDomainRename(virDomainPtr dom, > > virCheckFlags(0, ret); > > - if (!(vm = qemuDomObjFromDomain(dom))) > - goto cleanup; > - > - if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0) > - goto cleanup; > - > cfg = virQEMUDriverGetConfig(driver); > > - if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) > - goto cleanup; > - > - if (virDomainObjIsActive(vm)) { > - virReportError(VIR_ERR_OPERATION_INVALID, "%s", > - _("cannot rename active domain")); > - goto endjob; > - } > - > - if (!vm->persistent) { > - virReportError(VIR_ERR_OPERATION_INVALID, "%s", > - _("cannot rename a transient domain")); > - goto endjob; > - } > - > - if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) { > - virReportError(VIR_ERR_OPERATION_INVALID, > - "%s", _("domain has to be shutoff before renaming")); > - goto endjob; > - } > - > - if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) { > - virReportError(VIR_ERR_OPERATION_INVALID, > - "%s", _("cannot rename domain with snapshots")); > - goto endjob; > - } > - > - if (STREQ(vm->def->name, new_name)) { > - virReportError(VIR_ERR_OPERATION_INVALID, "%s", > - _("Can't rename domain to itself")); > - goto endjob; > - } > - > - /* > - * This is a rather racy check, but still better than reporting > - * internal error. And since new_name != name here, there's no > - * deadlock imminent. > - */ > - tmp_dom = virDomainObjListFindByName(driver->domains, new_name); > - if (tmp_dom) { > - virObjectUnlock(tmp_dom); > - virObjectUnref(tmp_dom); > - virReportError(VIR_ERR_OPERATION_INVALID, > - _("domain with name '%s' already exists"), > - new_name); > - goto endjob; > - } > - > if (VIR_STRDUP(new_dom_name, new_name) < 0) > - goto endjob; > + goto cleanup; > > if (!(old_dom_cfg_file = virDomainConfigFile(cfg->configDir, > vm->def->name))) { > - goto endjob; > + goto cleanup; > } > > - if (virDomainObjListRenameAddNew(driver->domains, vm, new_name) < 0) > - goto endjob; > - > event_old = virDomainEventLifecycleNewFromObj(vm, > VIR_DOMAIN_EVENT_UNDEFINED, > VIR_DOMAIN_EVENT_UNDEFINED_RENAMED); > @@ -20080,21 +20023,12 @@ static int qemuDomainRename(virDomainPtr dom, > goto rollback; > } > > - /* Remove old domain name from table. */ > - virDomainObjListRenameRemove(driver->domains, old_dom_name); > - > event_new = virDomainEventLifecycleNewFromObj(vm, > VIR_DOMAIN_EVENT_DEFINED, > VIR_DOMAIN_EVENT_DEFINED_RENAMED); > - > - /* Success, domain has been renamed. */ > ret = 0; > > - endjob: > - qemuDomainObjEndJob(driver, vm); > - > cleanup: > - virDomainObjEndAPI(&vm); > VIR_FREE(old_dom_cfg_file); > VIR_FREE(old_dom_name); > VIR_FREE(new_dom_name); > @@ -20109,9 +20043,65 @@ static int qemuDomainRename(virDomainPtr dom, > vm->def->name = old_dom_name; > old_dom_name = NULL; > } > + goto cleanup; > +} > > - virDomainObjListRenameRemove(driver->domains, new_name); > - goto endjob; > +static int qemuDomainRename(virDomainPtr dom, > + const char *new_name, > + unsigned int flags) > +{ > + virQEMUDriverPtr driver = dom->conn->privateData; > + virDomainObjPtr vm = NULL; > + int ret = -1; > + > + virCheckFlags(0, ret); > + > + if (!(vm = qemuDomObjFromDomain(dom))) > + goto cleanup; > + > + if (virDomainRenameEnsureACL(dom->conn, vm->def) < 0) > + goto cleanup; > + > + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) > + goto cleanup; > + > + if (virDomainObjIsActive(vm)) { > + virReportError(VIR_ERR_OPERATION_INVALID, "%s", > + _("cannot rename active domain")); > + goto endjob; > + } > + > + if (!vm->persistent) { > + virReportError(VIR_ERR_OPERATION_INVALID, "%s", > + _("cannot rename a transient domain")); > + goto endjob; > + } > + > + if (virDomainObjGetState(vm, NULL) != VIR_DOMAIN_SHUTOFF) { > + virReportError(VIR_ERR_OPERATION_INVALID, > + "%s", _("domain has to be shutoff before renaming")); > + goto endjob; > + } > + > + if (virDomainSnapshotObjListNum(vm->snapshots, NULL, 0) > 0) { > + virReportError(VIR_ERR_OPERATION_INVALID, > + "%s", _("cannot rename domain with snapshots")); > + goto endjob; > + } > + > + if (virDomainObjListRename(driver->domains, vm, new_name, flags, > + qemuDomainRenameCallback, driver) < 0) > + goto endjob; > + > + /* Success, domain has been renamed. */ > + ret = 0; > + > + endjob: > + qemuDomainObjEndJob(driver, vm); > + > + cleanup: > + virDomainObjEndAPI(&vm); > + return ret; > } > > static virHypervisorDriver qemuHypervisorDriver = { > -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list