This phase marks a migration protocol as broken in a post-copy phase. Libvirt is no longer actively watching the migration in this phase as the migration API that started the migration failed. This may either happen when post-copy migration really fails (QEMU enters postcopy-paused migration state) or when the migration still progresses between both QEMU processes, but libvirt lost control of it because the connection between libvirt daemons (in p2p migration) or a daemon and client (non-p2p migration) was closed. For example, when one of the daemons was restarted. Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx> Reviewed-by: Peter Krempa <pkrempa@xxxxxxxxxx> Reviewed-by: Pavel Hrdina <phrdina@xxxxxxxxxx> --- Notes: Version 2: - moved most of the last hunk to a separate patch src/qemu/qemu_migration.c | 15 +++++++++++---- src/qemu/qemu_process.c | 11 ++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 88702c94e4..302589b63c 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -2341,6 +2341,7 @@ qemuMigrationSrcCleanup(virDomainObj *vm, vm->def->name); if (virDomainObjIsPostcopy(vm, VIR_DOMAIN_JOB_OPERATION_MIGRATION_OUT)) { + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); qemuMigrationSrcPostcopyFailed(vm); qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); qemuMigrationJobContinue(vm); @@ -2352,8 +2353,10 @@ qemuMigrationSrcCleanup(virDomainObj *vm, } break; + case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: case QEMU_MIGRATION_PHASE_BEGIN_RESUME: case QEMU_MIGRATION_PHASE_PERFORM_RESUME: + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); qemuMigrationSrcPostcopyFailed(vm); qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); qemuMigrationJobContinue(vm); @@ -2374,7 +2377,6 @@ qemuMigrationSrcCleanup(virDomainObj *vm, case QEMU_MIGRATION_PHASE_PERFORM2: /* single phase outgoing migration; unreachable */ case QEMU_MIGRATION_PHASE_NONE: - case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: case QEMU_MIGRATION_PHASE_LAST: /* unreachable */ ; @@ -3744,6 +3746,7 @@ qemuMigrationSrcConfirm(virQEMUDriver *driver, flags, cancelled); if (virDomainObjIsFailedPostcopy(vm)) { + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); qemuMigrationJobContinue(vm); } else { @@ -5572,6 +5575,7 @@ qemuMigrationSrcPerformJob(virQEMUDriver *driver, virErrorPreserveLast(&orig_err); if (virDomainObjIsFailedPostcopy(vm)) { + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); qemuMigrationJobContinue(vm); } else { @@ -5664,6 +5668,8 @@ qemuMigrationSrcPerformPhase(virQEMUDriver *driver, jobPriv->migParams, priv->job.apiFlags); qemuMigrationJobFinish(vm); } else { + if (ret < 0) + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); qemuMigrationJobContinue(vm); } @@ -5903,7 +5909,7 @@ qemuMigrationDstComplete(virQEMUDriver *driver, /* Guest is successfully running, so cancel previous auto destroy. There's * nothing to remove when we are resuming post-copy migration. */ - if (!virDomainObjIsFailedPostcopy(vm)) + if (job->phase < QEMU_MIGRATION_PHASE_POSTCOPY_FAILED) qemuProcessAutoDestroyRemove(driver, vm); /* Remove completed stats for post-copy, everything but timing fields @@ -6170,6 +6176,7 @@ qemuMigrationDstFinishActive(virQEMUDriver *driver, } if (virDomainObjIsFailedPostcopy(vm)) { + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); qemuProcessAutoDestroyRemove(driver, vm); qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); *finishJob = false; @@ -6290,9 +6297,9 @@ qemuMigrationProcessUnattended(virQEMUDriver *driver, vm->def->name); if (job == VIR_ASYNC_JOB_MIGRATION_IN) - phase = QEMU_MIGRATION_PHASE_FINISH3; + phase = QEMU_MIGRATION_PHASE_FINISH_RESUME; else - phase = QEMU_MIGRATION_PHASE_CONFIRM3; + phase = QEMU_MIGRATION_PHASE_CONFIRM_RESUME; if (qemuMigrationJobStartPhase(vm, phase) < 0) return; diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index f752668b2f..8a98c03395 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -1555,9 +1555,12 @@ qemuProcessHandleMigrationStatus(qemuMonitor *mon G_GNUC_UNUSED, * watching it in any thread. Let's make sure the migration is properly * finished in case we get a "completed" event. */ - if (virDomainObjIsFailedPostcopy(vm) && priv->job.asyncOwner == 0) + if (virDomainObjIsPostcopy(vm, priv->job.current->operation) && + priv->job.phase == QEMU_MIGRATION_PHASE_POSTCOPY_FAILED && + priv->job.asyncOwner == 0) { qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_UNATTENDED_MIGRATION, priv->job.asyncJob, status, NULL); + } break; case QEMU_MONITOR_MIGRATION_STATUS_INACTIVE: @@ -3507,7 +3510,6 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver, case QEMU_MIGRATION_PHASE_PERFORM3_DONE: case QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED: case QEMU_MIGRATION_PHASE_CONFIRM3: - case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: case QEMU_MIGRATION_PHASE_BEGIN_RESUME: case QEMU_MIGRATION_PHASE_PERFORM_RESUME: case QEMU_MIGRATION_PHASE_CONFIRM_RESUME: @@ -3545,6 +3547,7 @@ qemuProcessRecoverMigrationIn(virQEMUDriver *driver, } break; + case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: case QEMU_MIGRATION_PHASE_PREPARE_RESUME: case QEMU_MIGRATION_PHASE_FINISH_RESUME: return 1; @@ -3581,7 +3584,6 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, case QEMU_MIGRATION_PHASE_PREPARE: case QEMU_MIGRATION_PHASE_FINISH2: case QEMU_MIGRATION_PHASE_FINISH3: - case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: case QEMU_MIGRATION_PHASE_PREPARE_RESUME: case QEMU_MIGRATION_PHASE_FINISH_RESUME: case QEMU_MIGRATION_PHASE_LAST: @@ -3643,6 +3645,7 @@ qemuProcessRecoverMigrationOut(virQEMUDriver *driver, } return 1; + case QEMU_MIGRATION_PHASE_POSTCOPY_FAILED: case QEMU_MIGRATION_PHASE_BEGIN_RESUME: case QEMU_MIGRATION_PHASE_PERFORM_RESUME: return 1; @@ -3694,6 +3697,8 @@ qemuProcessRecoverMigration(virQEMUDriver *driver, return -1; if (rc > 0) { + job->phase = QEMU_MIGRATION_PHASE_POSTCOPY_FAILED; + if (migStatus == VIR_DOMAIN_JOB_STATUS_POSTCOPY) { VIR_DEBUG("Post-copy migration of domain %s still running, it will be handled as unattended", vm->def->name); -- 2.35.1