When recovering from a failed post-copy migration, we need to go through all migration phases again, but don't need to repeat all the steps in each phase. Let's create a new set of migration phases dedicated to post-copy recovery so that we can easily distinguish between normal and recovery code. Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx> Reviewed-by: Peter Krempa <pkrempa@xxxxxxxxxx> Reviewed-by: Pavel Hrdina <phrdina@xxxxxxxxxx> --- Notes: Version 2: - additional comments src/qemu/qemu_migration.c | 20 +++++++++++++++++++- src/qemu/qemu_migration.h | 6 ++++++ src/qemu/qemu_process.c | 29 +++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 02827bd975..710aae3eb7 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -79,6 +79,12 @@ VIR_ENUM_IMPL(qemuMigrationJobPhase, "prepare", "finish2", "finish3", + "postcopy_failed", + "begin_resume", + "perform_resume", + "confirm_resume", + "prepare_resume", + "finish_resume", ); @@ -139,7 +145,8 @@ qemuMigrationJobSetPhase(virDomainObj *vm, { qemuDomainObjPrivate *priv = vm->privateData; - if (phase < priv->job.phase) { + if (phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED && + phase < priv->job.phase) { VIR_ERROR(_("migration protocol going backwards %s => %s"), qemuMigrationJobPhaseTypeToString(priv->job.phase), qemuMigrationJobPhaseTypeToString(phase)); @@ -2328,18 +2335,29 @@ qemuMigrationSrcCleanup(virDomainObj *vm, } break; + case QEMU_MIGRATION_PHASE_BEGIN_RESUME: + case QEMU_MIGRATION_PHASE_PERFORM_RESUME: + qemuMigrationSrcPostcopyFailed(vm); + qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); + qemuMigrationJobContinue(vm); + break; + case QEMU_MIGRATION_PHASE_PERFORM3: /* cannot be seen without an active migration API; unreachable */ case QEMU_MIGRATION_PHASE_CONFIRM3: case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED: + case QEMU_MIGRATION_PHASE_CONFIRM_RESUME: /* all done; unreachable */ case QEMU_MIGRATION_PHASE_PREPARE: case QEMU_MIGRATION_PHASE_FINISH2: case QEMU_MIGRATION_PHASE_FINISH3: + case QEMU_MIGRATION_PHASE_PREPARE_RESUME: + case QEMU_MIGRATION_PHASE_FINISH_RESUME: /* incoming migration; unreachable */ case QEMU_MIGRATION_PHASE_PERFORM2: /* single phase outgoing migration; unreachable */ case QEMU_MIGRATION_PHASE_NONE: + case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: case QEMU_MIGRATION_PHASE_LAST: /* unreachable */ ; diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 9351d6ac51..7eb0d4fe02 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -100,6 +100,12 @@ typedef enum { QEMU_MIGRATION_PHASE_PREPARE, QEMU_MIGRATION_PHASE_FINISH2, QEMU_MIGRATION_PHASE_FINISH3, + QEMU_MIGRATION_PHASE_POSTCOPY_FAILED, /* marker for resume phases */ + QEMU_MIGRATION_PHASE_BEGIN_RESUME, + QEMU_MIGRATION_PHASE_PERFORM_RESUME, + QEMU_MIGRATION_PHASE_CONFIRM_RESUME, + QEMU_MIGRATION_PHASE_PREPARE_RESUME, + QEMU_MIGRATION_PHASE_FINISH_RESUME, QEMU_MIGRATION_PHASE_LAST } qemuMigrationJobPhase; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 6dd643a38b..f752668b2f 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3507,6 +3507,10 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver, case QEMU_MIGRATION_PHASE_PERFORM3_DONE: case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED: case QEMU_MIGRATION_PHASE_CONFIRM3: + case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: + case QEMU_MIGRATION_PHASE_BEGIN_RESUME: + case QEMU_MIGRATION_PHASE_PERFORM_RESUME: + case QEMU_MIGRATION_PHASE_CONFIRM_RESUME: case QEMU_MIGRATION_PHASE_LAST: /* N/A for incoming migration */ break; @@ -3540,6 +3544,10 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver, return -1; } break; + + case QEMU_MIGRATION_PHASE_PREPARE_RESUME: + case QEMU_MIGRATION_PHASE_FINISH_RESUME: + return 1; } return 0; @@ -3548,7 +3556,8 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver, /* * Returns - * -1 on error, the domain will be killed, + * -1 the domain should be killed (either after a successful migration or + * on error), * 0 the domain should remain running with the migration job discarded, * 1 the daemon was restarted during post-copy phase */ @@ -3556,6 +3565,7 @@ static int qemuProcessRecoverMigrationOut(virQEMUDriver *driver, virDomainObj *vm, qemuDomainJobObj *job, + virDomainJobStatus migStatus, virDomainState state, int reason, unsigned int *stopFlags) @@ -3571,6 +3581,9 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, case QEMU_MIGRATION_PHASE_PREPARE: case QEMU_MIGRATION_PHASE_FINISH2: case QEMU_MIGRATION_PHASE_FINISH3: + case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: + case QEMU_MIGRATION_PHASE_PREPARE_RESUME: + case QEMU_MIGRATION_PHASE_FINISH_RESUME: case QEMU_MIGRATION_PHASE_LAST: /* N/A for outgoing migration */ break; @@ -3621,6 +3634,18 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, /* migration completed, we need to kill the domain here */ *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED; return -1; + + case QEMU_MIGRATION_PHASE_CONFIRM_RESUME: + if (migStatus == VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED) { + /* migration completed, we need to kill the domain here */ + *stopFlags |= VIR_QEMU_PROCESS_STOP_MIGRATED; + return -1; + } + return 1; + + case QEMU_MIGRATION_PHASE_BEGIN_RESUME: + case QEMU_MIGRATION_PHASE_PERFORM_RESUME: + return 1; } if (resume) { @@ -3659,7 +3684,7 @@ qemuProcessRecoverMigration(virQEMUDriver *driver, qemuMigrationAnyRefreshStatus(driver, vm, VIR_ASYNC_JOB_NONE, &migStatus); if (job->asyncJob == VIR_ASYNC_JOB_MIGRATION_OUT) { - rc = qemuProcessRecoverMigrationOut(driver, vm, job, + rc = qemuProcessRecoverMigrationOut(driver, vm, job, migStatus, state, reason, stopFlags); } else { rc = qemuProcessRecoverMigrationIn(driver, vm, job, state); -- 2.35.1