Imagine two threads. Thread A is executing qemuProcessStop() and thread B is executing qemuDomainCreateXML(). To make things more interesting, the domain XML passed to qemuDomainCreateXML matches name + UUID of that the virDomainObj that qemuProcessStop() is currently working with. Here's what happens. 1) Thread A locks @vm, enters qemuProcessStop(). 2) Thread B parses given XML, calls virDomainObjListAdd() -> virDomainObjListAdd() -> virDomainObjListAddLocked() -> virDomainObjListFindByUUIDLocked(). Since there's a match on UUID, an virDomainObj object is returned and the thread proceeds to calling virObjectLock(). NB, it's the same object as in thread A. 3) Thread A sets vm->def->id = -1; this means that from this point on, virDomainObjIsActive() will return false. 4) Thread A calls qemuDomainObjStopWorker() which unlocks the @vm. 5) Thread B acquires the @vm lock and since virDomainObjIsActive() returns false, it proceeds to calling virDomainObjAssignDef() where vm->def is replaced. 6) Thread B then calls qemuProcessBeginJob() which unlocks the @vm temporarily. 7) Thread A, still in qemuDomainObjStopWorker() acquires @vm lock and proceeds with cleanup. 8) Thread A finds different definition than the one needing cleanup. In my testing I've seen stale pointers, e.g. vm->def->nets[0]->priv was NULL, which lead to a SIGSEGV as there's 'QEMU_DOMAIN_NETWORK_PRIVATE(net)->created' line when cleaning up nets. Your mileage may vary. Even if we did not crash, the plain fact that vm->def is changed in the middle of qemuProcessStop() means we might be cleaning up something else than intended. As a fix, I'm moving all lines that obviously touch vm->def before the domain object is unlocked. That left virHookCall(VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END) nearly next to virHookCall(VIR_HOOK_QEMU_OP_RELEASE, VIR_HOOK_SUBOP_END) which I figured is not something we want. So I've shuffled things a bit more. Fixes: 3865410e7f67ca4ec66e9a905e75f452762a97f0 Resolves: https://issues.redhat.com/browse/RHEL-49607 Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> --- src/qemu/qemu_process.c | 122 ++++++++++++++++++++-------------------- 1 file changed, 62 insertions(+), 60 deletions(-) diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 25dfd04272..9ea6c678b8 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -8530,6 +8530,18 @@ void qemuProcessStop(virQEMUDriver *driver, VIR_QEMU_PROCESS_KILL_FORCE| VIR_QEMU_PROCESS_KILL_NOCHECK)); + vm->pid = 0; + + /* now that we know it's stopped call the hook if present */ + if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) { + g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0); + + /* we can't stop the operation even if the script raised an error */ + ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, + VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END, + NULL, xml, NULL)); + } + if (priv->agent) { g_clear_pointer(&priv->agent, qemuAgentClose); } @@ -8553,25 +8565,6 @@ void qemuProcessStop(virQEMUDriver *driver, qemuDBusStop(driver, vm); - /* Only after this point we can reset 'priv->beingDestroyed' so that - * there's no point at which the VM could be considered as alive between - * entering the destroy job and this point where the active "flag" is - * cleared. - */ - vm->def->id = -1; - priv->beingDestroyed = false; - - /* Wake up anything waiting on domain condition */ - virDomainObjBroadcast(vm); - - /* IMPORTANT: qemuDomainObjStopWorker() unlocks @vm in order to prevent - * deadlocks with the per-VM event loop thread. This MUST be done after - * marking the VM as dead */ - qemuDomainObjStopWorker(vm); - - if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback) - driver->inhibitCallback(false, driver->inhibitOpaque); - /* Clear network bandwidth */ virDomainClearNetBandwidth(vm->def); @@ -8588,18 +8581,6 @@ void qemuProcessStop(virQEMUDriver *driver, } } - virPortAllocatorRelease(priv->nbdPort); - priv->nbdPort = 0; - - if (priv->monConfig) { - if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX) - unlink(priv->monConfig->data.nix.path); - g_clear_pointer(&priv->monConfig, virObjectUnref); - } - - /* Remove the master key */ - qemuDomainMasterKeyRemove(priv); - ignore_value(virDomainChrDefForeach(vm->def, false, qemuProcessCleanupChardevDevice, @@ -8609,22 +8590,6 @@ void qemuProcessStop(virQEMUDriver *driver, /* Its namespace is also gone then. */ qemuDomainDestroyNamespace(driver, vm); - virFileDeleteTree(priv->libDir); - virFileDeleteTree(priv->channelTargetDir); - - /* Stop autodestroy in case guest is restarted */ - virCloseCallbacksDomainRemove(vm, NULL, qemuProcessAutoDestroy); - - /* now that we know it's stopped call the hook if present */ - if (virHookPresent(VIR_HOOK_DRIVER_QEMU)) { - g_autofree char *xml = qemuDomainDefFormatXML(driver, NULL, vm->def, 0); - - /* we can't stop the operation even if the script raised an error */ - ignore_value(virHookCall(VIR_HOOK_DRIVER_QEMU, vm->def->name, - VIR_HOOK_QEMU_OP_STOPPED, VIR_HOOK_SUBOP_END, - NULL, xml, NULL)); - } - /* Reset Security Labels unless caller don't want us to */ if (!(flags & VIR_QEMU_PROCESS_STOP_NO_RELABEL)) qemuSecurityRestoreAllLabel(driver, vm, @@ -8672,8 +8637,6 @@ void qemuProcessStop(virQEMUDriver *driver, virResctrlAllocRemove(vm->def->resctrls[i]->alloc); } - qemuProcessRemoveDomainStatus(driver, vm); - /* Remove VNC and Spice ports from port reservation bitmap, but only if they were reserved by the driver (autoport=yes) */ @@ -8706,20 +8669,9 @@ void qemuProcessStop(virQEMUDriver *driver, } } - for (i = 0; i < vm->ndeprecations; i++) - g_free(vm->deprecations[i]); - g_clear_pointer(&vm->deprecations, g_free); - vm->ndeprecations = 0; - vm->taint = 0; - vm->pid = 0; - virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason); for (i = 0; i < vm->def->niothreadids; i++) vm->def->iothreadids[i]->thread_id = 0; - /* clean up a possible backup job */ - if (priv->backup) - qemuBackupJobTerminate(vm, VIR_DOMAIN_JOB_STATUS_CANCELED); - /* Do this explicitly after vm->pid is reset so that security drivers don't * try to enter the domain's namespace which is non-existent by now as qemu * is no longer running. */ @@ -8753,6 +8705,56 @@ void qemuProcessStop(virQEMUDriver *driver, qemuSecurityReleaseLabel(driver->securityManager, vm->def); + /* Only after this point we can reset 'priv->beingDestroyed' so that + * there's no point at which the VM could be considered as alive between + * entering the destroy job and this point where the active "flag" is + * cleared. + */ + vm->def->id = -1; + priv->beingDestroyed = false; + + /* Wake up anything waiting on domain condition */ + virDomainObjBroadcast(vm); + + /* IMPORTANT: qemuDomainObjStopWorker() unlocks @vm in order to prevent + * deadlocks with the per-VM event loop thread. This MUST be done after + * marking the VM as dead */ + qemuDomainObjStopWorker(vm); + + if (!!g_atomic_int_dec_and_test(&driver->nactive) && driver->inhibitCallback) + driver->inhibitCallback(false, driver->inhibitOpaque); + + virPortAllocatorRelease(priv->nbdPort); + priv->nbdPort = 0; + + if (priv->monConfig) { + if (priv->monConfig->type == VIR_DOMAIN_CHR_TYPE_UNIX) + unlink(priv->monConfig->data.nix.path); + g_clear_pointer(&priv->monConfig, virObjectUnref); + } + + /* Remove the master key */ + qemuDomainMasterKeyRemove(priv); + + virFileDeleteTree(priv->libDir); + virFileDeleteTree(priv->channelTargetDir); + + /* Stop autodestroy in case guest is restarted */ + virCloseCallbacksDomainRemove(vm, NULL, qemuProcessAutoDestroy); + + qemuProcessRemoveDomainStatus(driver, vm); + + for (i = 0; i < vm->ndeprecations; i++) + g_free(vm->deprecations[i]); + g_clear_pointer(&vm->deprecations, g_free); + vm->ndeprecations = 0; + vm->taint = 0; + virDomainObjSetState(vm, VIR_DOMAIN_SHUTOFF, reason); + + /* clean up a possible backup job */ + if (priv->backup) + qemuBackupJobTerminate(vm, VIR_DOMAIN_JOB_STATUS_CANCELED); + /* clear all private data entries which are no longer needed */ qemuDomainObjPrivateDataClear(priv); -- 2.44.2