Destination daemon should not rely on the client or source daemon (depending on the type of migration) to call Finish when migration fails, because the client may crash before it can do so. The domain prepared for incoming migration is set to be destroyed (and migration job cleaned up) when connection with the client closes but this is not enough. If the associated qemu process crashes after Prepare step and the domain is cleaned up before the connection gets closed, autodestroy is not called for the domain and migration jobs remains set. In case the domain is defined on destination host (i.e., it is not completely removed once destroyed) we keep the job set for ever. To fix this, we register a cleanup callback which is responsible to clean migration-in job when a domain dies anywhere between Prepare and Finish steps. Note that we can't blindly clean any job when spotting EOF on monitor since normally an API is running at that time. --- src/qemu/qemu_domain.c | 2 -- src/qemu/qemu_domain.h | 2 ++ src/qemu/qemu_migration.c | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c index a9469cf..41ffd6a 100644 --- a/src/qemu/qemu_domain.c +++ b/src/qemu/qemu_domain.c @@ -51,7 +51,6 @@ (VIR_DOMAIN_XML_SECURE | \ VIR_DOMAIN_XML_UPDATE_CPU) -VIR_ENUM_DECL(qemuDomainJob) VIR_ENUM_IMPL(qemuDomainJob, QEMU_JOB_LAST, "none", "query", @@ -64,7 +63,6 @@ VIR_ENUM_IMPL(qemuDomainJob, QEMU_JOB_LAST, "async nested", ); -VIR_ENUM_DECL(qemuDomainAsyncJob) VIR_ENUM_IMPL(qemuDomainAsyncJob, QEMU_ASYNC_JOB_LAST, "none", "migration out", diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h index af83c0e..d79ff1d 100644 --- a/src/qemu/qemu_domain.h +++ b/src/qemu/qemu_domain.h @@ -64,6 +64,7 @@ enum qemuDomainJob { QEMU_JOB_LAST }; +VIR_ENUM_DECL(qemuDomainJob) /* Async job consists of a series of jobs that may change state. Independent * jobs that do not change state (and possibly others if explicitly allowed by @@ -78,6 +79,7 @@ enum qemuDomainAsyncJob { QEMU_ASYNC_JOB_LAST }; +VIR_ENUM_DECL(qemuDomainAsyncJob) struct qemuDomainJobObj { virCond cond; /* Use to coordinate jobs */ diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 81b2d5b..4eb3bf4 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1107,6 +1107,23 @@ cleanup: /* Prepare is the first step, and it runs on the destination host. */ +static void +qemuMigrationPrepareCleanup(struct qemud_driver *driver, + virDomainObjPtr vm) +{ + qemuDomainObjPrivatePtr priv = vm->privateData; + + VIR_DEBUG("driver=%p, vm=%s, job=%s, asyncJob=%s", + driver, + vm->def->name, + qemuDomainJobTypeToString(priv->job.active), + qemuDomainAsyncJobTypeToString(priv->job.asyncJob)); + + if (!qemuMigrationJobIsActive(vm, QEMU_ASYNC_JOB_MIGRATION_IN)) + return; + qemuDomainObjDiscardAsyncJob(driver, vm); +} + static int qemuMigrationPrepareAny(struct qemud_driver *driver, virConnectPtr dconn, @@ -1264,6 +1281,9 @@ qemuMigrationPrepareAny(struct qemud_driver *driver, VIR_WARN("Unable to encode migration cookie"); } + if (qemuDomainCleanupAdd(vm, qemuMigrationPrepareCleanup) < 0) + goto endjob; + virDomainAuditStart(vm, "migrated", true); event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STARTED, @@ -2703,6 +2723,8 @@ qemuMigrationFinish(struct qemud_driver *driver, v3proto ? QEMU_MIGRATION_PHASE_FINISH3 : QEMU_MIGRATION_PHASE_FINISH2); + qemuDomainCleanupRemove(vm, qemuMigrationPrepareCleanup); + if (flags & VIR_MIGRATE_PERSIST_DEST) cookie_flags |= QEMU_MIGRATION_COOKIE_PERSISTENT; -- 1.7.8.5 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list