Make MIGRATION_OUT use the new helper methods. This also introduces new protection to migration v3 process: the migration job is held from Begin to Confirm to avoid changes to a domain during migration (esp. between Begin and Perform phases). This change is automatically applied to p2p and tunneled migrations. For normal migration, this requires support from a client. In other words, if an old (pre 0.9.4) client starts normal migration of a domain, the domain will not be protected against changes between Begin and Perform steps. --- include/libvirt/libvirt.h.in | 3 + src/libvirt.c | 27 ++++- src/libvirt_internal.h | 6 + src/qemu/qemu_driver.c | 61 +++++++++- src/qemu/qemu_migration.c | 272 ++++++++++++++++++++++++++++++------------ src/qemu/qemu_migration.h | 3 +- 6 files changed, 285 insertions(+), 87 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 607b5bc..18cc521 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -674,6 +674,9 @@ typedef enum { VIR_MIGRATE_NON_SHARED_DISK = (1 << 6), /* migration with non-shared storage with full disk copy */ VIR_MIGRATE_NON_SHARED_INC = (1 << 7), /* migration with non-shared storage with incremental copy */ /* (same base image shared between source and destination) */ + VIR_MIGRATE_CHANGE_PROTECTION = (1 << 8), /* protect for changing domain configuration through the + * whole migration process; this will be used automatically + * if both parties support it */ } virDomainMigrateFlags; diff --git a/src/libvirt.c b/src/libvirt.c index 39e2041..446a47b 100644 --- a/src/libvirt.c +++ b/src/libvirt.c @@ -3763,7 +3763,9 @@ virDomainMigrateVersion3(virDomainPtr domain, int ret; virDomainInfo info; virErrorPtr orig_err = NULL; - int cancelled; + int cancelled = 1; + unsigned long protection = 0; + VIR_DOMAIN_DEBUG(domain, "dconn=%p xmlin=%s, flags=%lx, " "dname=%s, uri=%s, bandwidth=%lu", dconn, NULLSTR(xmlin), flags, @@ -3779,10 +3781,14 @@ virDomainMigrateVersion3(virDomainPtr domain, return NULL; } + if (VIR_DRV_SUPPORTS_FEATURE(domain->conn->driver, domain->conn, + VIR_DRV_FEATURE_MIGRATE_CHANGE_PROTECTION)) + protection = VIR_MIGRATE_CHANGE_PROTECTION; + VIR_DEBUG("Begin3 %p", domain->conn); dom_xml = domain->conn->driver->domainMigrateBegin3 (domain, xmlin, &cookieout, &cookieoutlen, - flags, dname, bandwidth); + flags | protection, dname, bandwidth); if (!dom_xml) goto done; @@ -3800,14 +3806,22 @@ virDomainMigrateVersion3(virDomainPtr domain, (dconn, cookiein, cookieinlen, &cookieout, &cookieoutlen, uri, &uri_out, flags, dname, bandwidth, dom_xml); VIR_FREE (dom_xml); - if (ret == -1) - goto done; + if (ret == -1) { + if (protection) { + /* Begin already started a migration job so we need to cancel it by + * calling Confirm while making sure it doesn't overwrite the error + */ + orig_err = virSaveLastError(); + goto confirm; + } else { + goto done; + } + } if (uri == NULL && uri_out == NULL) { virLibConnError(VIR_ERR_INTERNAL_ERROR, _("domainMigratePrepare3 did not set uri")); virDispatchError(domain->conn); - cancelled = 1; goto finish; } if (uri_out) @@ -3828,7 +3842,7 @@ virDomainMigrateVersion3(virDomainPtr domain, ret = domain->conn->driver->domainMigratePerform3 (domain, NULL, cookiein, cookieinlen, &cookieout, &cookieoutlen, NULL, - uri, flags, dname, bandwidth); + uri, flags | protection, dname, bandwidth); /* Perform failed. Make sure Finish doesn't overwrite the error */ if (ret < 0) @@ -3873,6 +3887,7 @@ finish: if (!orig_err) orig_err = virSaveLastError(); +confirm: /* * If cancelled, then src VM will be restarted, else * it will be killed diff --git a/src/libvirt_internal.h b/src/libvirt_internal.h index 83c25fc..6e44341 100644 --- a/src/libvirt_internal.h +++ b/src/libvirt_internal.h @@ -73,6 +73,12 @@ enum { * domainMigrateConfirm3. */ VIR_DRV_FEATURE_MIGRATION_V3 = 6, + + /* + * Driver supports protecting the whole V3-style migration against changes + * to domain configuration, i.e., starting from Begin3 and not Perform3. + */ + VIR_DRV_FEATURE_MIGRATE_CHANGE_PROTECTION = 7, }; diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index 8870e33..b378cb7 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -889,6 +889,7 @@ qemudSupportsFeature (virConnectPtr conn ATTRIBUTE_UNUSED, int feature) case VIR_DRV_FEATURE_MIGRATION_V2: case VIR_DRV_FEATURE_MIGRATION_V3: case VIR_DRV_FEATURE_MIGRATION_P2P: + case VIR_DRV_FEATURE_MIGRATE_CHANGE_PROTECTION: return 1; default: return 0; @@ -6869,12 +6870,56 @@ qemuDomainMigrateBegin3(virDomainPtr domain, goto cleanup; } - xml = qemuMigrationBegin(driver, vm, xmlin, - cookieout, cookieoutlen); + if ((flags & VIR_MIGRATE_CHANGE_PROTECTION)) { + if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) + goto cleanup; + } else { + if (qemuDomainObjBeginJob(driver, vm, QEMU_JOB_MODIFY) < 0) + goto cleanup; + } + + if (!virDomainObjIsActive(vm)) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("domain is not running")); + goto endjob; + } + + if (!(xml = qemuMigrationBegin(driver, vm, xmlin, + cookieout, cookieoutlen))) + goto endjob; + + if ((flags & VIR_MIGRATE_CHANGE_PROTECTION)) { + /* We keep the job active across API calls until the confirm() call. + * This prevents any other APIs being invoked while migration is taking + * place. + */ + if (qemuMigrationJobContinue(vm) == 0) { + vm = NULL; + qemuReportError(VIR_ERR_OPERATION_FAILED, + "%s", _("domain disappeared")); + VIR_FREE(xml); + if (cookieout) + VIR_FREE(*cookieout); + } + } else { + goto endjob; + } cleanup: + if (vm) + virDomainObjUnlock(vm); qemuDriverUnlock(driver); return xml; + +endjob: + if ((flags & VIR_MIGRATE_CHANGE_PROTECTION)) { + if (qemuMigrationJobFinish(driver, vm) == 0) + vm = NULL; + } else { + if (qemuDomainObjEndJob(driver, vm) == 0) + vm = NULL; + } + goto cleanup; } static int @@ -7056,6 +7101,7 @@ qemuDomainMigrateConfirm3(virDomainPtr domain, struct qemud_driver *driver = domain->conn->privateData; virDomainObjPtr vm; int ret = -1; + enum qemuMigrationJobPhase phase; virCheckFlags(QEMU_MIGRATION_FLAGS, -1); @@ -7069,14 +7115,21 @@ qemuDomainMigrateConfirm3(virDomainPtr domain, goto cleanup; } - if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_MODIFY) < 0) + if (!qemuMigrationJobIsActive(vm, QEMU_ASYNC_JOB_MIGRATION_OUT)) goto cleanup; + if (cancelled) + phase = QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED; + else + phase = QEMU_MIGRATION_PHASE_CONFIRM3; + + qemuMigrationJobStartPhase(driver, vm, phase); + ret = qemuMigrationConfirm(driver, domain->conn, vm, cookiein, cookieinlen, flags, cancelled); - if (qemuDomainObjEndJob(driver, vm) == 0) { + if (qemuMigrationJobFinish(driver, vm) == 0) { vm = NULL; } else if (!virDomainObjIsActive(vm) && (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE))) { diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 6e7117b..3eeb67f 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1009,6 +1009,7 @@ qemuDomainMigrateGraphicsRelocate(struct qemud_driver *driver, } +/* The caller is supposed to lock the vm and start a migration job. */ char *qemuMigrationBegin(struct qemud_driver *driver, virDomainObjPtr vm, const char *xmlin, @@ -1018,14 +1019,17 @@ char *qemuMigrationBegin(struct qemud_driver *driver, char *rv = NULL; qemuMigrationCookiePtr mig = NULL; virDomainDefPtr def = NULL; + qemuDomainObjPrivatePtr priv = vm->privateData; + VIR_DEBUG("driver=%p, vm=%p, xmlin=%s, cookieout=%p, cookieoutlen=%p", driver, vm, NULLSTR(xmlin), cookieout, cookieoutlen); - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("domain is not running")); - goto cleanup; - } + /* Only set the phase if we are inside QEMU_ASYNC_JOB_MIGRATION_OUT. + * Otherwise we will start the async job later in the perform phase losing + * change protection. + */ + if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) + qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_BEGIN3); if (qemuProcessAutoDestroyActive(driver, vm)) { qemuReportError(VIR_ERR_OPERATION_INVALID, @@ -1063,7 +1067,6 @@ char *qemuMigrationBegin(struct qemud_driver *driver, } cleanup: - virDomainObjUnlock(vm); qemuMigrationCookieFree(mig); virDomainDefFree(def); return rv; @@ -1904,6 +1907,7 @@ static int doPeer2PeerMigrate2(struct qemud_driver *driver, * until the migration is complete. */ VIR_DEBUG("Perform %p", sconn); + qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM2); if (flags & VIR_MIGRATE_TUNNELLED) ret = doTunnelMigrate(driver, vm, st, NULL, 0, NULL, NULL, @@ -2038,6 +2042,7 @@ static int doPeer2PeerMigrate3(struct qemud_driver *driver, * confirm migration completion. */ VIR_DEBUG("Perform3 %p uri=%s uri_out=%s", sconn, uri, uri_out); + qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3); VIR_FREE(cookiein); cookiein = cookieout; cookieinlen = cookieoutlen; @@ -2055,8 +2060,12 @@ static int doPeer2PeerMigrate3(struct qemud_driver *driver, flags, dname, resource); /* Perform failed. Make sure Finish doesn't overwrite the error */ - if (ret < 0) + if (ret < 0) { orig_err = virSaveLastError(); + } else { + qemuMigrationJobSetPhase(driver, vm, + QEMU_MIGRATION_PHASE_PERFORM3_DONE); + } /* If Perform returns < 0, then we need to cancel the VM * startup on the destination @@ -2213,35 +2222,32 @@ cleanup: } -int qemuMigrationPerform(struct qemud_driver *driver, - virConnectPtr conn, - virDomainObjPtr vm, - const char *xmlin, - const char *dconnuri, - const char *uri, - const char *cookiein, - int cookieinlen, - char **cookieout, - int *cookieoutlen, - unsigned long flags, - const char *dname, - unsigned long resource, - bool v3proto) +/* + * This implements perform part of the migration protocol when migration job + * does not need to be active across several APIs, i.e., peer2peer migration or + * perform phase of v2 non-peer2peer migration. + */ +static int +qemuMigrationPerformJob(struct qemud_driver *driver, + virConnectPtr conn, + virDomainObjPtr vm, + const char *xmlin, + const char *dconnuri, + const char *uri, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource, + bool v3proto) { virDomainEventPtr event = NULL; int ret = -1; int resume = 0; - qemuDomainObjPrivatePtr priv = vm->privateData; - VIR_DEBUG("driver=%p, conn=%p, vm=%p, xmlin=%s, dconnuri=%s, " - "uri=%s, cookiein=%s, cookieinlen=%d, cookieout=%p, " - "cookieoutlen=%p, flags=%lx, dname=%s, resource=%lu, v3proto=%d", - driver, conn, vm, NULLSTR(xmlin), NULLSTR(dconnuri), - NULLSTR(uri), NULLSTR(cookiein), cookieinlen, - cookieout, cookieoutlen, flags, NULLSTR(dname), - resource, v3proto); - if (qemuDomainObjBeginAsyncJobWithDriver(driver, vm, - QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) + if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) goto cleanup; if (!virDomainObjIsActive(vm)) { @@ -2256,52 +2262,33 @@ int qemuMigrationPerform(struct qemud_driver *driver, goto endjob; } - memset(&priv->job.info, 0, sizeof(priv->job.info)); - priv->job.info.type = VIR_DOMAIN_JOB_UNBOUNDED; - resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING; if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { - if (cookieinlen) { - qemuReportError(VIR_ERR_OPERATION_INVALID, - "%s", _("received unexpected cookie with P2P migration")); - goto endjob; - } - - if (doPeer2PeerMigrate(driver, conn, vm, xmlin, - dconnuri, uri, flags, dname, - resource, &v3proto) < 0) - /* doPeer2PeerMigrate already set the error, so just get out */ - goto endjob; + ret = doPeer2PeerMigrate(driver, conn, vm, xmlin, + dconnuri, uri, flags, dname, + resource, &v3proto); } else { - if (dconnuri) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, - "%s", _("Unexpected dconnuri parameter with non-peer2peer migration")); - goto endjob; - } - if (doNativeMigrate(driver, vm, uri, cookiein, cookieinlen, - cookieout, cookieoutlen, - flags, dname, resource) < 0) - goto endjob; + qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM2); + ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen, + cookieout, cookieoutlen, + flags, dname, resource); } + if (ret < 0) + goto endjob; /* * In v3 protocol, the source VM is not killed off until the * confirm step. */ - if (v3proto) { - resume = 0; - } else { + if (!v3proto) { qemuProcessStop(driver, vm, 1, VIR_DOMAIN_SHUTOFF_MIGRATED); virDomainAuditStop(vm, "migrated"); - resume = 0; - event = virDomainEventNewFromObj(vm, VIR_DOMAIN_EVENT_STOPPED, VIR_DOMAIN_EVENT_STOPPED_MIGRATED); } - - ret = 0; + resume = 0; endjob: if (resume && virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { @@ -2320,16 +2307,95 @@ endjob: VIR_DOMAIN_EVENT_RESUMED, VIR_DOMAIN_EVENT_RESUMED_MIGRATED); } - if (vm) { - if (qemuDomainObjEndAsyncJob(driver, vm) == 0) { - vm = NULL; - } else if (!virDomainObjIsActive(vm) && - (!vm->persistent || (flags & VIR_MIGRATE_UNDEFINE_SOURCE))) { - if (flags & VIR_MIGRATE_UNDEFINE_SOURCE) - virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); - virDomainRemoveInactive(&driver->domains, vm); - vm = NULL; + + if (qemuMigrationJobFinish(driver, vm) == 0) { + vm = NULL; + } else if (!virDomainObjIsActive(vm) && + (!vm->persistent || + (ret == 0 && (flags & VIR_MIGRATE_UNDEFINE_SOURCE)))) { + if (flags & VIR_MIGRATE_UNDEFINE_SOURCE) + virDomainDeleteConfig(driver->configDir, driver->autostartDir, vm); + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; + } + +cleanup: + if (vm) + virDomainObjUnlock(vm); + if (event) + qemuDomainEventQueue(driver, event); + return ret; +} + +/* + * This implements perform phase of v3 migration protocol. + */ +static int +qemuMigrationPerformPhase(struct qemud_driver *driver, + virConnectPtr conn, + virDomainObjPtr vm, + const char *uri, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource) +{ + virDomainEventPtr event = NULL; + int ret = -1; + bool resume; + int refs; + + /* If we didn't start the job in the begin phase, start it now. */ + if (!(flags & VIR_MIGRATE_CHANGE_PROTECTION)) { + if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) + goto cleanup; + } else if (!qemuMigrationJobIsActive(vm, QEMU_ASYNC_JOB_MIGRATION_OUT)) { + goto cleanup; + } + + qemuMigrationJobStartPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3); + + resume = virDomainObjGetState(vm, NULL) == VIR_DOMAIN_RUNNING; + ret = doNativeMigrate(driver, vm, uri, cookiein, cookieinlen, + cookieout, cookieoutlen, + flags, dname, resource); + + if (ret < 0 && resume && + virDomainObjGetState(vm, NULL) == VIR_DOMAIN_PAUSED) { + /* we got here through some sort of failure; start the domain again */ + if (qemuProcessStartCPUs(driver, vm, conn, + VIR_DOMAIN_RUNNING_MIGRATION_CANCELED) < 0) { + /* Hm, we already know we are in error here. We don't want to + * overwrite the previous error, though, so we just throw something + * to the logs and hope for the best + */ + VIR_ERROR(_("Failed to resume guest %s after failure"), + vm->def->name); } + + event = virDomainEventNewFromObj(vm, + VIR_DOMAIN_EVENT_RESUMED, + VIR_DOMAIN_EVENT_RESUMED_MIGRATED); + } + + if (ret < 0) + goto endjob; + + qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_PERFORM3_DONE); + +endjob: + if (ret < 0) + refs = qemuMigrationJobFinish(driver, vm); + else + refs = qemuMigrationJobContinue(vm); + if (refs == 0) { + vm = NULL; + } else if (!virDomainObjIsActive(vm) && !vm->persistent) { + virDomainRemoveInactive(&driver->domains, vm); + vm = NULL; } cleanup: @@ -2340,6 +2406,61 @@ cleanup: return ret; } +int +qemuMigrationPerform(struct qemud_driver *driver, + virConnectPtr conn, + virDomainObjPtr vm, + const char *xmlin, + const char *dconnuri, + const char *uri, + const char *cookiein, + int cookieinlen, + char **cookieout, + int *cookieoutlen, + unsigned long flags, + const char *dname, + unsigned long resource, + bool v3proto) +{ + VIR_DEBUG("driver=%p, conn=%p, vm=%p, xmlin=%s, dconnuri=%s, " + "uri=%s, cookiein=%s, cookieinlen=%d, cookieout=%p, " + "cookieoutlen=%p, flags=%lx, dname=%s, resource=%lu, v3proto=%d", + driver, conn, vm, NULLSTR(xmlin), NULLSTR(dconnuri), + NULLSTR(uri), NULLSTR(cookiein), cookieinlen, + cookieout, cookieoutlen, flags, NULLSTR(dname), + resource, v3proto); + + if ((flags & (VIR_MIGRATE_TUNNELLED | VIR_MIGRATE_PEER2PEER))) { + if (cookieinlen) { + qemuReportError(VIR_ERR_OPERATION_INVALID, + "%s", _("received unexpected cookie with P2P migration")); + return -1; + } + + return qemuMigrationPerformJob(driver, conn, vm, xmlin, dconnuri, uri, + cookiein, cookieinlen, cookieout, + cookieoutlen, flags, dname, resource, + v3proto); + } else { + if (dconnuri) { + qemuReportError(VIR_ERR_INTERNAL_ERROR, + "%s", _("Unexpected dconnuri parameter with non-peer2peer migration")); + return -1; + } + + if (v3proto) { + return qemuMigrationPerformPhase(driver, conn, vm, uri, + cookiein, cookieinlen, + cookieout, cookieoutlen, + flags, dname, resource); + } else { + return qemuMigrationPerformJob(driver, conn, vm, xmlin, dconnuri, + uri, cookiein, cookieinlen, + cookieout, cookieoutlen, flags, + dname, resource, v3proto); + } + } +} #if WITH_MACVTAP static void @@ -2573,15 +2694,14 @@ int qemuMigrationConfirm(struct qemud_driver *driver, virCheckFlags(QEMU_MIGRATION_FLAGS, -1); + qemuMigrationJobSetPhase(driver, vm, + retcode == 0 + ? QEMU_MIGRATION_PHASE_CONFIRM3 + : QEMU_MIGRATION_PHASE_CONFIRM3_CANCELLED); + if (!(mig = qemuMigrationEatCookie(driver, vm, cookiein, cookieinlen, 0))) return -1; - if (!virDomainObjIsActive(vm)) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("guest unexpectedly quit")); - goto cleanup; - } - /* Did the migration go as planned? If yes, kill off the * domain object, but if no, resume CPUs */ diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 005e415..9e88271 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -34,7 +34,8 @@ VIR_MIGRATE_UNDEFINE_SOURCE | \ VIR_MIGRATE_PAUSED | \ VIR_MIGRATE_NON_SHARED_DISK | \ - VIR_MIGRATE_NON_SHARED_INC) + VIR_MIGRATE_NON_SHARED_INC | \ + VIR_MIGRATE_CHANGE_PROTECTION) enum qemuMigrationJobPhase { QEMU_MIGRATION_PHASE_NONE = 0, -- 1.7.6 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list