Ever since we introduced fake reboot, we call qemuProcessKill as a reaction to SHUTDOWN event. Unfortunately, qemu doesn't guarantee it flushed all internal buffers before sending SHUTDOWN, in which case killing the process forcibly may result in (virtual) disk corruption. By sending just SIGTERM without SIGKILL we give qemu time to to flush all buffers and exit. Once qemu exits, we will see an EOF on monitor connection and tear down the domain. In case qemu ignores SIGTERM or just hangs there, the process stays running but that's not any different from a possible hang anytime during the shutdown process so I think it's just fine. Also qemu (since 0.14 until it's fixed) has a bug in SIGTERM processing which causes it not to exit but instead send new SHUTDOWN event and keep waiting. I think the best we can do is to ignore duplicate SHUTDOWN events to avoid a SHUTDOWN-SIGTERM loop and leave the domain in paused state. --- src/qemu/qemu_driver.c | 2 +- src/qemu/qemu_process.c | 25 ++++++++++++++++++------- src/qemu/qemu_process.h | 2 +- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index d2626ff..9ff800f 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -1870,7 +1870,7 @@ qemuDomainDestroyFlags(virDomainPtr dom, * can kill the process even if a job is active. Killing * it now means the job will be released */ - qemuProcessKill(vm); + qemuProcessKill(vm, false); if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0) goto cleanup; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 24d1dc7..dbd697d 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -419,7 +419,7 @@ endjob: cleanup: if (vm) { if (ret == -1) - qemuProcessKill(vm); + qemuProcessKill(vm, false); if (virDomainObjUnref(vm) > 0) virDomainObjUnlock(vm); } @@ -437,6 +437,12 @@ qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED, VIR_DEBUG("vm=%p", vm); virDomainObjLock(vm); + if (priv->gotShutdown) { + VIR_DEBUG("Ignoring repeated SHUTDOWN event from domain %s", + vm->def->name); + goto cleanup; + } + priv->gotShutdown = true; if (priv->fakeReboot) { virDomainObjRef(vm); @@ -446,16 +452,17 @@ qemuProcessHandleShutdown(qemuMonitorPtr mon ATTRIBUTE_UNUSED, qemuProcessFakeReboot, vm) < 0) { VIR_ERROR(_("Failed to create reboot thread, killing domain")); - qemuProcessKill(vm); + qemuProcessKill(vm, true); if (virDomainObjUnref(vm) == 0) vm = NULL; } } else { - qemuProcessKill(vm); + qemuProcessKill(vm, true); } + +cleanup: if (vm) virDomainObjUnlock(vm); - return 0; } @@ -3183,10 +3190,11 @@ cleanup: } -void qemuProcessKill(virDomainObjPtr vm) +void qemuProcessKill(virDomainObjPtr vm, bool gracefully) { int i; - VIR_DEBUG("vm=%s pid=%d", vm->def->name, vm->pid); + VIR_DEBUG("vm=%s pid=%d gracefully=%d", + vm->def->name, vm->pid, gracefully); if (!virDomainObjIsActive(vm)) { VIR_DEBUG("VM '%s' not active", vm->def->name); @@ -3216,6 +3224,9 @@ void qemuProcessKill(virDomainObjPtr vm) break; } + if (i == 0 && gracefully) + break; + usleep(200 * 1000); } } @@ -3300,7 +3311,7 @@ void qemuProcessStop(struct qemud_driver *driver, } /* shut it off for sure */ - qemuProcessKill(vm); + qemuProcessKill(vm, false); /* Stop autodestroy in case guest is restarted */ qemuProcessAutoDestroyRemove(driver, vm); diff --git a/src/qemu/qemu_process.h b/src/qemu/qemu_process.h index 96ba3f3..ef422c4 100644 --- a/src/qemu/qemu_process.h +++ b/src/qemu/qemu_process.h @@ -68,7 +68,7 @@ int qemuProcessAttach(virConnectPtr conn, virDomainChrSourceDefPtr monConfig, bool monJSON); -void qemuProcessKill(virDomainObjPtr vm); +void qemuProcessKill(virDomainObjPtr vm, bool gracefully); int qemuProcessAutoDestroyInit(struct qemud_driver *driver); void qemuProcessAutoDestroyRun(struct qemud_driver *driver, -- 1.7.6.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list