On Tue, May 10, 2022 at 17:21:15 +0200, Jiri Denemark wrote: > Mostly we just need to check whether the domain is in a failed post-copy > migration that can be resumed. > > Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx> > --- > src/qemu/qemu_migration.c | 143 ++++++++++++++++++++++++++++++++++++++ > 1 file changed, 143 insertions(+) > > diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c > index c111dd8686..99b1d4b88b 100644 > --- a/src/qemu/qemu_migration.c > +++ b/src/qemu/qemu_migration.c > @@ -2717,6 +2717,143 @@ qemuMigrationSrcBeginPhase(virQEMUDriver *driver, > flags); > } > > + > +static bool > +qemuMigrationAnyCanResume(virDomainObj *vm, > + virDomainAsyncJob job, > + unsigned long flags, > + qemuMigrationJobPhase expectedPhase) > +{ > + qemuDomainObjPrivate *priv = vm->privateData; > + > + VIR_DEBUG("vm=%p, job=%s, flags=0x%lx, expectedPhase=%s", > + vm, virDomainAsyncJobTypeToString(job), flags, > + qemuDomainAsyncJobPhaseToString(VIR_ASYNC_JOB_MIGRATION_OUT, > + expectedPhase)); > + > + if (!(flags & VIR_MIGRATE_POSTCOPY)) { > + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", > + _("resuming failed post-copy migration requires " > + "post-copy to be enabled")); Error messages shall not have line breaks. Also ... requiring this flag seems to be a bit pointless and ... cosmetic at best. Since the previous migration would need to be started with the post-copy flag already which is checked below. > + return false; > + } > + > + /* This should never happen since POSTCOPY_RESUME is newer than > + * CHANGE_PROTECTION, but let's check it anyway in case we're talking to > + * a weired client. > + */ > + if (job == VIR_ASYNC_JOB_MIGRATION_OUT && > + expectedPhase < QEMU_MIGRATION_PHASE_PERFORM_RESUME && > + !(flags & VIR_MIGRATE_CHANGE_PROTECTION)) { > + virReportError(VIR_ERR_ARGUMENT_UNSUPPORTED, "%s", > + _("resuming failed post-copy migration requires " > + "change protection")); Same here. > + return NULL; > + } [...] > +static char * > +qemuMigrationSrcBeginResume(virQEMUDriver *driver, > + virDomainObj *vm, > + const char *xmlin, > + char **cookieout, > + int *cookieoutlen, > + unsigned long flags) > +{ > + virDomainJobStatus status; > + > + if (qemuMigrationAnyRefreshStatus(driver, vm, VIR_ASYNC_JOB_MIGRATION_OUT, > + &status) < 0) > + return NULL; > + > + if (status != VIR_DOMAIN_JOB_STATUS_POSTCOPY_PAUSED) { > + virReportError(VIR_ERR_OPERATION_INVALID, "%s", > + _("QEMU reports migration is still running")); > + return NULL; > + } > + > + return qemuMigrationSrcBeginXML(driver, vm, xmlin, > + cookieout, cookieoutlen, 0, NULL, 0, flags); Certain steps here around the non-shared-storage migration are redundant at this point and IMO should be skipped. > +} > + > + > +static char * > +qemuMigrationSrcBeginResumePhase(virConnectPtr conn, > + virQEMUDriver *driver, > + virDomainObj *vm, > + const char *xmlin, > + char **cookieout, > + int *cookieoutlen, > + unsigned long flags) > +{ > + g_autofree char *xml = NULL; > + > + VIR_DEBUG("vm=%p", vm); > + > + if (!qemuMigrationAnyCanResume(vm, VIR_ASYNC_JOB_MIGRATION_OUT, flags, > + QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)) > + return NULL; > + > + if (qemuMigrationJobStartPhase(vm, QEMU_MIGRATION_PHASE_BEGIN_RESUME) < 0) > + return NULL; > + > + virCloseCallbacksUnset(driver->closeCallbacks, vm, > + qemuMigrationSrcCleanup); > + qemuDomainCleanupRemove(vm, qemuProcessCleanupMigrationJob); Could you please add an explanation why we are removing the callbacks ... > + > + xml = qemuMigrationSrcBeginResume(driver, vm, xmlin, cookieout, cookieoutlen, flags); > + > + if (virCloseCallbacksSet(driver->closeCallbacks, vm, conn, > + qemuMigrationSrcCleanup) < 0) > + g_clear_pointer(&xml, g_free); > + > + if (!xml) > + ignore_value(qemuMigrationJobSetPhase(vm, QEMU_MIGRATION_PHASE_POSTCOPY_FAILED)); > + > + qemuDomainCleanupAdd(vm, qemuProcessCleanupMigrationJob); Just to add them here? > + qemuMigrationJobContinue(vm); > + return g_steal_pointer(&xml); > +}