When destroying a domain qemuDomainDestroy kills its qemu process and starts a new job, which means it unlocks the domain object and locks it again after some time. Although the object is usually unlocked for a pretty short time, chances are another thread processing an EOF event on qemu monitor is able to lock the object first and does all the cleanup by itself. This leads to wrong shutoff reason and lifecycle event detail and virDomainDestroy API incorrectly reporting failure to destroy an inactive domain. Reported by Charlie Smurthwaite. --- src/qemu/qemu_domain.h | 1 + src/qemu/qemu_driver.c | 10 ++++++++++ src/qemu/qemu_process.c | 22 +++++++++++++++------- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index e8bcab3..35f9440 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -110,6 +110,7 @@ struct _qemuDomainObjPrivate { bool monError; unsigned long long monStart; bool gotShutdown; + bool beingDestroyed; char *pidfile; int nvcpupids; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 10a289e..ceb9f47 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1678,6 +1678,7 @@ qemuDomainDestroyFlags(virDomainPtr dom, virDomainObjPtr vm; int ret = -1; virDomainEventPtr event = NULL; + qemuDomainObjPrivatePtr priv; virCheckFlags(0, -1); @@ -1691,6 +1692,8 @@ qemuDomainDestroyFlags(virDomainPtr dom, goto cleanup; } + priv = vm->privateData; + qemuDomainSetFakeReboot(driver, vm, false); /* Although qemuProcessStop does this already, there may @@ -1700,9 +1703,16 @@ qemuDomainDestroyFlags(virDomainPtr dom, */ qemuProcessKill(vm, false); + /* We need to prevent monitor EOF callback from doing our work (and sending + * misleading events) while the vm is unlocked inside BeginJob API + */ + priv->beingDestroyed = true; + if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0) goto cleanup; + priv->beingDestroyed = false; + if (!virDomainObjIsActive(vm)) { qemuReportError(VIR_ERR_OPERATION_INVALID, "%s", _("domain is not running")); diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index d4271d0..9123f4c 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -128,14 +128,18 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, qemuDriverLock(driver); virDomainObjLock(vm); + priv = vm->privateData; + + if (priv->beingDestroyed) { + VIR_DEBUG("Domain is being destroyed, EOF is expected"); + goto unlock; + } + if (!virDomainObjIsActive(vm)) { VIR_DEBUG("Domain %p is not active, ignoring EOF", vm); - virDomainObjUnlock(vm); - qemuDriverUnlock(driver); - return; + goto unlock; } - priv = vm->privateData; if (priv->monJSON && !priv->gotShutdown) { VIR_DEBUG("Monitor connection to '%s' closed without SHUTDOWN event; " "assuming the domain crashed", vm->def->name); @@ -150,11 +154,15 @@ qemuProcessHandleMonitorEOF(qemuMonitorPtr mon ATTRIBUTE_UNUSED, qemuProcessStop(driver, vm, 0, stopReason); virDomainAuditStop(vm, auditReason); - if (!vm->persistent) + if (!vm->persistent) { qemuDomainRemoveInactive(driver, vm); - else - virDomainObjUnlock(vm); + goto cleanup; + } +unlock: + virDomainObjUnlock(vm); + +cleanup: if (event) qemuDomainEventQueue(driver, event); qemuDriverUnlock(driver); -- 1.7.8 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list