There's no need to artificially pause a domain when post-copy fails. The virtual CPUs may continue running, only the guest tasks that decide to read a page which has not been migrated yet will get blocked. Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx> --- src/qemu/qemu_migration.c | 37 +++++++++++++++++++++++++++++++++---- src/qemu/qemu_migration.h | 6 ++++-- src/qemu/qemu_process.c | 8 ++++---- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index b735bdb391..a5c7a27124 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1577,14 +1577,19 @@ qemuMigrationSrcIsSafe(virDomainDef *def, void -qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver, - virDomainObj *vm) +qemuMigrationSrcPostcopyFailed(virDomainObj *vm) { + qemuDomainObjPrivate *priv = vm->privateData; + virQEMUDriver *driver = priv->driver; virDomainState state; int reason; state = virDomainObjGetState(vm, &reason); + VIR_DEBUG("%s/%s", + virDomainStateTypeToString(state), + virDomainStateReasonToString(state, reason)); + if (state != VIR_DOMAIN_PAUSED && state != VIR_DOMAIN_RUNNING) return; @@ -1608,6 +1613,30 @@ qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver, } +void +qemuMigrationDstPostcopyFailed(virDomainObj *vm) +{ + virDomainState state; + int reason; + + state = virDomainObjGetState(vm, &reason); + + VIR_DEBUG("%s/%s", + virDomainStateTypeToString(state), + virDomainStateReasonToString(state, reason)); + + if (state != VIR_DOMAIN_RUNNING || + reason == VIR_DOMAIN_RUNNING_POSTCOPY_FAILED) + return; + + VIR_WARN("Incoming migration of domain %s failed during post-copy; " + "leaving the domain running in a degraded mode", vm->def->name); + + virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, + VIR_DOMAIN_RUNNING_POSTCOPY_FAILED); +} + + static int qemuMigrationSrcWaitForSpice(virDomainObj *vm) { @@ -3470,7 +3499,7 @@ qemuMigrationSrcConfirmPhase(virQEMUDriver *driver, if (virDomainObjGetState(vm, &reason) == VIR_DOMAIN_PAUSED && reason == VIR_DOMAIN_PAUSED_POSTCOPY) - qemuMigrationAnyPostcopyFailed(driver, vm); + qemuMigrationSrcPostcopyFailed(vm); else qemuMigrationSrcRestoreDomainState(driver, vm); @@ -5847,7 +5876,7 @@ qemuMigrationDstFinish(virQEMUDriver *driver, VIR_DOMAIN_EVENT_STOPPED_FAILED); virObjectEventStateQueue(driver->domainEventState, event); } else { - qemuMigrationAnyPostcopyFailed(driver, vm); + qemuMigrationDstPostcopyFailed(vm); } } diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index a8afa66119..c4e4228282 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -251,8 +251,10 @@ qemuMigrationDstRun(virQEMUDriver *driver, virDomainAsyncJob asyncJob); void -qemuMigrationAnyPostcopyFailed(virQEMUDriver *driver, - virDomainObj *vm); +qemuMigrationSrcPostcopyFailed(virDomainObj *vm); + +void +qemuMigrationDstPostcopyFailed(virDomainObj *vm); int qemuMigrationSrcFetchMirrorStats(virQEMUDriver *driver, diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 1925559fad..a3192a7196 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3482,7 +3482,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver, * confirm success or failure yet; killing it seems safest unless * we already started guest CPUs or we were in post-copy mode */ if (postcopy) { - qemuMigrationAnyPostcopyFailed(driver, vm); + qemuMigrationDstPostcopyFailed(vm); } else if (state != VIR_DOMAIN_RUNNING) { VIR_DEBUG("Killing migrated domain %s", vm->def->name); return -1; @@ -3533,7 +3533,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, * post-copy mode */ if (postcopy) { - qemuMigrationAnyPostcopyFailed(driver, vm); + qemuMigrationSrcPostcopyFailed(vm); } else { VIR_DEBUG("Cancelling unfinished migration of domain %s", vm->def->name); @@ -3551,7 +3551,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, * post-copy mode we can use PAUSED_POSTCOPY_FAILED state for this */ if (postcopy) - qemuMigrationAnyPostcopyFailed(driver, vm); + qemuMigrationSrcPostcopyFailed(vm); break; case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED: @@ -3560,7 +3560,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, * as broken in that case */ if (postcopy) { - qemuMigrationAnyPostcopyFailed(driver, vm); + qemuMigrationSrcPostcopyFailed(vm); } else { VIR_DEBUG("Resuming domain %s after failed migration", vm->def->name); -- 2.35.1