[PATCH 2/2] qemu: Avoid false failure when resuming post-copy migration

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Depending on timing between QEMU and libvirt an attempt to resume failed
post-copy migration could immediately report a failure in post-copy
phase again even though the migration actually resumed and is
progressing just fine.

This is caused by QEMU reporting the original migration state (i.e.,
postcopy-paused) until migration is successfully resumed and QEMU
switches to postcopy-active. QEMU 9.1 introduced a new
postcopy-recover-setup migration state which is entered immediately
after requesting migration to be resumed and we can reliably wait for
the migration to either continue or fail without being confused by the
old state.

https://issues.redhat.com/browse/RHEL-22166

Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx>
---
 src/qemu/qemu_migration.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
index 4f02a9a053..7f905f8584 100644
--- a/src/qemu/qemu_migration.c
+++ b/src/qemu/qemu_migration.c
@@ -1962,6 +1962,7 @@ enum qemuMigrationCompletedFlags {
     QEMU_MIGRATION_COMPLETED_CHECK_STORAGE  = (1 << 1),
     QEMU_MIGRATION_COMPLETED_POSTCOPY       = (1 << 2),
     QEMU_MIGRATION_COMPLETED_PRE_SWITCHOVER = (1 << 3),
+    QEMU_MIRGATION_COMPLETED_RECOVERY       = (1 << 4),
 };
 
 
@@ -2023,6 +2024,16 @@ qemuMigrationAnyCompleted(virDomainObj *vm,
         return 1;
     }
 
+    /* When QEMU is new enough to enter postcopy-recover-setup state during
+     * post-copy recovery, the source waits for the recovery to start
+     * before letting the destination wait for migration to complete.
+     */
+    if (flags & QEMU_MIRGATION_COMPLETED_RECOVERY &&
+        jobData->status == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
+        VIR_DEBUG("Post-copy recovery active");
+        return 1;
+    }
+
     if (jobData->status == VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED)
         return 1;
     else
@@ -5115,6 +5126,7 @@ qemuMigrationSrcResume(virDomainObj *vm,
                        char **cookieout,
                        int *cookieoutlen,
                        qemuMigrationSpec *spec,
+                       virConnectPtr dconn,
                        unsigned int flags)
 {
     qemuDomainObjPrivate *priv = vm->privateData;
@@ -5145,6 +5157,17 @@ qemuMigrationSrcResume(virDomainObj *vm,
     if (rc < 0)
         return -1;
 
+    /* Wait for postcopy recovery to start (or fail) if QEMU is new enough to
+     * support postcopy-recover-setup migration state. */
+    if (priv->migrationRecoverSetup) {
+        VIR_DEBUG("Waiting for post-copy recovery to start");
+        if (qemuMigrationSrcWaitForCompletion(vm, VIR_ASYNC_JOB_MIGRATION_OUT, dconn,
+                                              QEMU_MIRGATION_COMPLETED_RECOVERY) < 0)
+            return -1;
+    } else {
+        VIR_WARN("QEMU is too old, we may report a failure in post-copy phase even though the migration may be running just fine");
+    }
+
     if (qemuMigrationCookieFormat(mig, driver, vm,
                                   QEMU_MIGRATION_SOURCE,
                                   cookieout, cookieoutlen,
@@ -5249,7 +5272,7 @@ qemuMigrationSrcPerformNative(virQEMUDriver *driver,
 
     if (flags & VIR_MIGRATE_POSTCOPY_RESUME) {
         ret = qemuMigrationSrcResume(vm, migParams, cookiein, cookieinlen,
-                                     cookieout, cookieoutlen, &spec, flags);
+                                     cookieout, cookieoutlen, &spec, dconn, flags);
     } else {
         ret = qemuMigrationSrcRun(driver, vm, xmlin, persist_xml, cookiein, cookieinlen,
                                   cookieout, cookieoutlen, flags, resource,
-- 
2.45.2




[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]

  Powered by Linux