Once we called qemuDomainObjEnterRemote to talk to the destination daemon during a peer to peer migration, the vm lock is released and we only hold an async job. If the source domain dies at this point the monitor EOF callback is allowed to do its job and (among other things) clear all private data irrelevant for stopped domain. Thus when we call qemuDomainObjExitRemote, the domain may already be gone and we should avoid touching runtime private data (such as current job info). In other words after acquiring the lock in qemuDomainObjExitRemote, we need to check the domain is still alive. Unless we're doing offline migration. https://bugzilla.redhat.com/show_bug.cgi?id=1589730 Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx> --- src/qemu/qemu_domain.c | 14 +++++++++++- src/qemu/qemu_domain.h | 5 +++-- src/qemu/qemu_migration.c | 45 ++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index 9afe705929..a22c836985 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -7016,11 +7016,23 @@ void qemuDomainObjEnterRemote(virDomainObjPtr obj) virObjectUnlock(obj); } -void qemuDomainObjExitRemote(virDomainObjPtr obj) + +int +qemuDomainObjExitRemote(virDomainObjPtr obj, + bool checkActive) { virObjectLock(obj); VIR_DEBUG("Exited remote (vm=%p name=%s)", obj, obj->def->name); + + if (checkActive && !virDomainObjIsActive(obj)) { + virReportError(VIR_ERR_OPERATION_FAILED, + _("domain '%s' is not running"), + obj->def->name); + return -1; + } + + return 0; } diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index 0a9af756b8..30d186a921 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -584,8 +584,9 @@ void qemuDomainObjExitAgent(virDomainObjPtr obj, qemuAgentPtr agent) void qemuDomainObjEnterRemote(virDomainObjPtr obj) ATTRIBUTE_NONNULL(1); -void qemuDomainObjExitRemote(virDomainObjPtr obj) - ATTRIBUTE_NONNULL(1); +int qemuDomainObjExitRemote(virDomainObjPtr obj, + bool checkActive) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK; virDomainDefPtr qemuDomainDefCopy(virQEMUDriverPtr driver, virDomainDefPtr src, diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index c9aaa38029..cafab2f3e8 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -3920,13 +3920,15 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepareTunnel (dconn, st, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } else { qemuDomainObjEnterRemote(vm); ret = dconn->driver->domainMigratePrepare2 (dconn, &cookie, &cookielen, NULL, &uri_out, destflags, dname, resource, dom_xml); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, true) < 0) + goto cleanup; } VIR_FREE(dom_xml); if (ret == -1) @@ -3987,7 +3989,8 @@ qemuMigrationSrcPerformPeer2Peer2(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish2 (dconn, dname, cookie, cookielen, uri_out ? uri_out : dconnuri, destflags, cancelled); - qemuDomainObjExitRemote(vm); + /* The domain is already gone at this point */ + ignore_value(qemuDomainObjExitRemote(vm, false)); if (cancelled && ddomain) VIR_ERROR(_("finish step ignored that migration was cancelled")); @@ -4052,6 +4055,7 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, int nparams = 0; int maxparams = 0; size_t i; + bool offline = !!(flags & VIR_MIGRATE_OFFLINE); VIR_DEBUG("driver=%p, sconn=%p, dconn=%p, dconnuri=%s, vm=%p, xmlin=%s, " "dname=%s, uri=%s, graphicsuri=%s, listenAddress=%s, " @@ -4145,7 +4149,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, (dconn, st, cookiein, cookieinlen, &cookieout, &cookieoutlen, destflags, dname, bandwidth, dom_xml); } - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } else { qemuDomainObjEnterRemote(vm); if (useParams) { @@ -4157,13 +4162,14 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, (dconn, cookiein, cookieinlen, &cookieout, &cookieoutlen, uri, &uri_out, destflags, dname, bandwidth, dom_xml); } - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } VIR_FREE(dom_xml); if (ret == -1) goto cleanup; - if (flags & VIR_MIGRATE_OFFLINE) { + if (offline) { VIR_DEBUG("Offline migration, skipping Perform phase"); VIR_FREE(cookieout); cookieoutlen = 0; @@ -4253,7 +4259,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish3Params (dconn, params, nparams, cookiein, cookieinlen, &cookieout, &cookieoutlen, destflags, cancelled); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } } else { dname = dname ? dname : vm->def->name; @@ -4261,7 +4268,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, ddomain = dconn->driver->domainMigrateFinish3 (dconn, dname, cookiein, cookieinlen, &cookieout, &cookieoutlen, dconnuri, uri, destflags, cancelled); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; } if (cancelled) { @@ -4399,6 +4407,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, virConnectPtr dconn = NULL; bool p2p; virErrorPtr orig_err = NULL; + bool offline = !!(flags & VIR_MIGRATE_OFFLINE); bool dstOffline = false; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); bool useParams; @@ -4439,7 +4448,9 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); dconn = virConnectOpenAuth(dconnuri, &virConnectAuthConfig, 0); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; + if (dconn == NULL) { virReportError(VIR_ERR_OPERATION_FAILED, _("Failed to connect to remote libvirt URI %s: %s"), @@ -4468,10 +4479,11 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, VIR_DRV_FEATURE_MIGRATION_V3); useParams = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_PARAMS); - if (flags & VIR_MIGRATE_OFFLINE) + if (offline) dstOffline = VIR_DRV_SUPPORTS_FEATURE(dconn->driver, dconn, VIR_DRV_FEATURE_MIGRATION_OFFLINE); - qemuDomainObjExitRemote(vm); + if (qemuDomainObjExitRemote(vm, !offline) < 0) + goto cleanup; if (!p2p) { virReportError(VIR_ERR_OPERATION_FAILED, "%s", @@ -4488,20 +4500,13 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, goto cleanup; } - if (flags & VIR_MIGRATE_OFFLINE && !dstOffline) { + if (offline && !dstOffline) { virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", _("offline migration is not supported by " "the destination host")); goto cleanup; } - /* domain may have been stopped while we were talking to remote daemon */ - if (!virDomainObjIsActive(vm) && !(flags & VIR_MIGRATE_OFFLINE)) { - virReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - /* Change protection is only required on the source side (us), and * only for v3 migration when begin and perform are separate jobs. * But peer-2-peer is already a single job, and we still want to @@ -4526,7 +4531,7 @@ qemuMigrationSrcPerformPeer2Peer(virQEMUDriverPtr driver, qemuDomainObjEnterRemote(vm); virConnectUnregisterCloseCallback(dconn, qemuMigrationSrcConnectionClosed); virObjectUnref(dconn); - qemuDomainObjExitRemote(vm); + ignore_value(qemuDomainObjExitRemote(vm, false)); if (orig_err) { virSetError(orig_err); virFreeError(orig_err); -- 2.18.0 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list