Paolo Bonzini pointed out that it's actually possible to migrate a qemu instance that was paused due to I/O error and it will be able to work on the destination if the storage is accessible. This patch introduces flag VIR_MIGRATE_ABORT_ON_ERROR that cancels the migration in case an I/O error happens while it's being performed and allows migration without this flag. This flag can be possibly used for other error reasons that may be introduced in the future. --- include/libvirt/libvirt.h.in | 1 + src/qemu/qemu_driver.c | 4 ++-- src/qemu/qemu_migration.c | 23 ++++++++++++++--------- src/qemu/qemu_migration.h | 6 ++++-- tools/virsh-domain.c | 7 +++++++ tools/virsh.pod | 6 ++++-- 6 files changed, 32 insertions(+), 15 deletions(-) diff --git a/include/libvirt/libvirt.h.in b/include/libvirt/libvirt.h.in index 574b970..bd75e1d 100644 --- a/include/libvirt/libvirt.h.in +++ b/include/libvirt/libvirt.h.in @@ -1188,6 +1188,7 @@ typedef enum { VIR_MIGRATE_UNSAFE = (1 << 9), /* force migration even if it is considered unsafe */ VIR_MIGRATE_OFFLINE = (1 << 10), /* offline migrate */ VIR_MIGRATE_COMPRESSED = (1 << 11), /* compress data during migration */ + VIR_MIGRATE_ABORT_ON_ERROR = (1 << 12), /* abort migration on I/O errors happened during migration */ } virDomainMigrateFlags; /* Domain migration. */ diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index c886378..4b23bc3 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -2819,7 +2819,7 @@ qemuDomainSaveInternal(virQEMUDriverPtr driver, virDomainPtr dom, if (!(caps = virQEMUDriverGetCapabilities(driver, false))) goto cleanup; - if (!qemuMigrationIsAllowed(driver, vm, vm->def, false)) + if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false)) goto cleanup; if (qemuDomainObjBeginAsyncJob(driver, vm, @@ -11663,7 +11663,7 @@ qemuDomainSnapshotCreateActiveExternal(virConnectPtr conn, /* do the memory snapshot if necessary */ if (memory) { /* check if migration is possible */ - if (!qemuMigrationIsAllowed(driver, vm, vm->def, false)) + if (!qemuMigrationIsAllowed(driver, vm, vm->def, false, false)) goto endjob; /* allow the migration job to be cancelled or the domain to be paused */ diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 48e0d44..281467b 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -1420,7 +1420,7 @@ cleanup: * the fact that older servers did not do checks on the source. */ bool qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm, - virDomainDefPtr def, bool remote) + virDomainDefPtr def, bool remote, bool abort_on_error) { int nsnapshots; int pauseReason; @@ -1448,7 +1448,8 @@ qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm, } /* cancel migration if disk I/O error is emitted while migrating */ - if (virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED && + if (abort_on_error && + virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED && pauseReason == VIR_DOMAIN_PAUSED_IOERROR) { virReportError(VIR_ERR_OPERATION_INVALID, "%s", _("cannot migrate domain with I/O error")); @@ -1692,7 +1693,7 @@ qemuMigrationUpdateJobStatus(virQEMUDriverPtr driver, static int qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, enum qemuDomainAsyncJob asyncJob, - virConnectPtr dconn) + virConnectPtr dconn, bool abort_on_error) { qemuDomainObjPrivatePtr priv = vm->privateData; const char *job; @@ -1719,7 +1720,7 @@ qemuMigrationWaitForCompletion(virQEMUDriverPtr driver, virDomainObjPtr vm, struct timespec ts = { .tv_sec = 0, .tv_nsec = 50 * 1000 * 1000ull }; /* cancel migration if disk I/O error is emitted while migrating */ - if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT && + if (abort_on_error && virDomainObjGetState(vm, &pauseReason) == VIR_DOMAIN_PAUSED && pauseReason == VIR_DOMAIN_PAUSED_IOERROR) goto cancel; @@ -1920,6 +1921,7 @@ char *qemuMigrationBegin(virQEMUDriverPtr driver, qemuDomainObjPrivatePtr priv = vm->privateData; virCapsPtr caps = NULL; unsigned int cookieFlags = QEMU_MIGRATION_COOKIE_LOCKSTATE; + bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR); VIR_DEBUG("driver=%p, vm=%p, xmlin=%s, dname=%s," " cookieout=%p, cookieoutlen=%p, flags=%lx", @@ -1936,7 +1938,7 @@ char *qemuMigrationBegin(virQEMUDriverPtr driver, if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) qemuMigrationJobSetPhase(driver, vm, QEMU_MIGRATION_PHASE_BEGIN3); - if (!qemuMigrationIsAllowed(driver, vm, NULL, true)) + if (!qemuMigrationIsAllowed(driver, vm, NULL, true, abort_on_error)) goto cleanup; if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def)) @@ -2052,6 +2054,7 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, virCapsPtr caps = NULL; const char *listenAddr = NULL; char *migrateFrom = NULL; + bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR); if (virTimeMillisNow(&now) < 0) return -1; @@ -2081,7 +2084,7 @@ qemuMigrationPrepareAny(virQEMUDriverPtr driver, if (!(caps = virQEMUDriverGetCapabilities(driver, false))) goto cleanup; - if (!qemuMigrationIsAllowed(driver, NULL, *def, true)) + if (!qemuMigrationIsAllowed(driver, NULL, *def, true, abort_on_error)) goto cleanup; /* Let migration hook filter domain XML */ @@ -2777,6 +2780,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, unsigned long migrate_speed = resource ? resource : priv->migMaxBandwidth; virErrorPtr orig_err = NULL; unsigned int cookieFlags = 0; + bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR); VIR_DEBUG("driver=%p, vm=%p, cookiein=%s, cookieinlen=%d, " "cookieout=%p, cookieoutlen=%p, flags=%lx, resource=%lu, " @@ -2929,7 +2933,7 @@ qemuMigrationRun(virQEMUDriverPtr driver, if (qemuMigrationWaitForCompletion(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT, - dconn) < 0) + dconn, abort_on_error) < 0) goto cleanup; /* When migration completed, QEMU will have paused the @@ -3610,6 +3614,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, int resume = 0; virErrorPtr orig_err = NULL; virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver); + bool abort_on_error = !!(flags & VIR_MIGRATE_ABORT_ON_ERROR); if (qemuMigrationJobStart(driver, vm, QEMU_ASYNC_JOB_MIGRATION_OUT) < 0) goto cleanup; @@ -3620,7 +3625,7 @@ qemuMigrationPerformJob(virQEMUDriverPtr driver, goto endjob; } - if (!qemuMigrationIsAllowed(driver, vm, NULL, true)) + if (!qemuMigrationIsAllowed(driver, vm, NULL, true, abort_on_error)) goto endjob; if (!(flags & VIR_MIGRATE_UNSAFE) && !qemuMigrationIsSafe(vm->def)) @@ -4316,7 +4321,7 @@ qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm, if (rc < 0) goto cleanup; - rc = qemuMigrationWaitForCompletion(driver, vm, asyncJob, NULL); + rc = qemuMigrationWaitForCompletion(driver, vm, asyncJob, NULL, false); if (rc < 0) goto cleanup; diff --git a/src/qemu/qemu_migration.h b/src/qemu/qemu_migration.h index 22b04b4..5b21ca2 100644 --- a/src/qemu/qemu_migration.h +++ b/src/qemu/qemu_migration.h @@ -38,7 +38,8 @@ VIR_MIGRATE_CHANGE_PROTECTION | \ VIR_MIGRATE_UNSAFE | \ VIR_MIGRATE_OFFLINE | \ - VIR_MIGRATE_COMPRESSED) + VIR_MIGRATE_COMPRESSED | \ + VIR_MIGRATE_ABORT_ON_ERROR) enum qemuMigrationJobPhase { QEMU_MIGRATION_PHASE_NONE = 0, @@ -147,7 +148,8 @@ int qemuMigrationConfirm(virQEMUDriverPtr driver, int retcode); bool qemuMigrationIsAllowed(virQEMUDriverPtr driver, virDomainObjPtr vm, - virDomainDefPtr def, bool remote); + virDomainDefPtr def, bool remote, + bool abort_on_error); int qemuMigrationToFile(virQEMUDriverPtr driver, virDomainObjPtr vm, int fd, off_t offset, const char *path, diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index 955e882..7f9b9e3 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -8306,6 +8306,10 @@ static const vshCmdOptDef opts_migrate[] = { .type = VSH_OT_BOOL, .help = N_("compress repeated pages during live migration") }, + {.name = "abort-on-error", + .type = VSH_OT_BOOL, + .help = N_("abort on soft errors during migration") + }, {.name = "domain", .type = VSH_OT_DATA, .flags = VSH_OFLAG_REQ, @@ -8399,6 +8403,9 @@ doMigrate(void *opaque) flags |= VIR_MIGRATE_OFFLINE; } + if (vshCommandOptBool(cmd, "abort-on-error")) + flags |= VIR_MIGRATE_ABORT_ON_ERROR; + if (xmlfile && virFileReadAll(xmlfile, 8192, &xml) < 0) { vshError(ctl, _("file '%s' doesn't exist"), xmlfile); diff --git a/tools/virsh.pod b/tools/virsh.pod index 405b4d2..21367d4 100644 --- a/tools/virsh.pod +++ b/tools/virsh.pod @@ -1043,7 +1043,8 @@ stats. =item B<migrate> [I<--live>] [I<--offline>] [I<--direct>] [I<--p2p> [I<--tunnelled>]] [I<--persistent>] [I<--undefinesource>] [I<--suspend>] [I<--copy-storage-all>] [I<--copy-storage-inc>] [I<--change-protection>] [I<--unsafe>] [I<--verbose>] -[I<--compressed>] I<domain> I<desturi> [I<migrateuri>] [I<dname>] +[I<--compressed>] [I<--abort-on-error>] +I<domain> I<desturi> [I<migrateuri>] [I<dname>] [I<--timeout> B<seconds>] [I<--xml> B<file>] Migrate domain to another host. Add I<--live> for live migration; <--p2p> @@ -1066,7 +1067,8 @@ is implicitly enabled when supported by the hypervisor, but can be explicitly used to reject the migration if the hypervisor lacks change protection support. I<--verbose> displays the progress of migration. I<--compressed> activates compression of memory pages that have to be transferred repeatedly -during live migration. +during live migration. I<--abort-on-error> cancels the migration if a soft +error (for example I/O error) happens during the migration. B<Note>: Individual hypervisors usually do not support all possible types of migration. For example, QEMU does not support direct migration. -- 1.8.2.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list