https://bugzilla.redhat.com/show_bug.cgi?id=1448268 When migrating to a file (e.g. when doing 'virsh save file'), couple of things are happening in the thread that is executing the API: 1) the domain obj is locked 2) iohelper is spawned as a separate process to handle all I/O 3) the thread waits for iohelper to finish 4) the domain obj is unlocked Now, the problem is that while the thread waits in step 3 for iohelper to finish this may take ages because iohelper calls fdatasync(). And unfortunately, we are waiting the whole time with the domain locked. So if another thread wants to jump in and say copy the domain name ('virsh list' for instance), they are stuck. The solution is to unlock the domain whenever waiting for I/O and lock it back again when it finished. Signed-off-by: Michal Privoznik <mprivozn@xxxxxxxxxx> --- diff to v1: - check after locking the domain obj back if the domain is still alive, since it was unlocked it might have changed its state. src/qemu/qemu_driver.c | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index e1a0dd553..6095a5ec5 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -3216,6 +3216,31 @@ qemuOpenFileAs(uid_t fallback_uid, gid_t fallback_gid, goto cleanup; } + +static int +qemuFileWrapperFDClose(virDomainObjPtr vm, + virFileWrapperFdPtr fd) +{ + int ret; + + /* virFileWrapperFd uses iohelper to write data onto disk. + * However, iohelper calls fdatasync() which may take ages to + * finish. Therefore, we shouldn't be waiting with the domain + * object locked. */ + + virObjectUnlock(vm); + ret = virFileWrapperFdClose(fd); + virObjectLock(vm); + if (!virDomainObjIsActive(vm)) { + if (!virGetLastError()) + virReportError(VIR_ERR_OPERATION_FAILED, "%s", + _("domain is no longer running")); + ret = -1; + } + return ret; +} + + /* Helper function to execute a migration to file with a correct save header * the caller needs to make sure that the processors are stopped and do all other * actions besides saving memory */ @@ -3276,7 +3301,7 @@ qemuDomainSaveMemory(virQEMUDriverPtr driver, goto cleanup; } - if (virFileWrapperFdClose(wrapperFd) < 0) + if (qemuFileWrapperFDClose(vm, wrapperFd) < 0) goto cleanup; if ((fd = qemuOpenFile(driver, vm, path, O_WRONLY, NULL, NULL)) < 0 || @@ -3827,7 +3852,7 @@ doCoreDump(virQEMUDriverPtr driver, path); goto cleanup; } - if (virFileWrapperFdClose(wrapperFd) < 0) + if (qemuFileWrapperFDClose(vm, wrapperFd) < 0) goto cleanup; ret = 0; @@ -6687,7 +6712,7 @@ qemuDomainRestoreFlags(virConnectPtr conn, ret = qemuDomainSaveImageStartVM(conn, driver, vm, &fd, data, path, false, QEMU_ASYNC_JOB_START); - if (virFileWrapperFdClose(wrapperFd) < 0) + if (qemuFileWrapperFDClose(vm, wrapperFd) < 0) VIR_WARN("Failed to close %s", path); qemuProcessEndJob(driver, vm); @@ -6962,7 +6987,7 @@ qemuDomainObjRestore(virConnectPtr conn, ret = qemuDomainSaveImageStartVM(conn, driver, vm, &fd, data, path, start_paused, asyncJob); - if (virFileWrapperFdClose(wrapperFd) < 0) + if (qemuFileWrapperFDClose(vm, wrapperFd) < 0) VIR_WARN("Failed to close %s", path); cleanup: -- 2.13.5 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list