>From e28cb2a03a670e4c0e7641f68f9d9f3accb00ae0 Mon Sep 17 00:00:00 2001 From: Yilu Lin <linyilu@xxxxxxxxxx> Date: Tue, 4 Aug 2020 02:42:00 -0400 Subject: [PATCH] fix vm schizencephaly when heartbeat stoped Signed-off-by: Yilu Lin <linyilu@xxxxxxxxxx> If keepalive messages lost in finish step, vm maybe schizencephaly. Shutdown src vm for protection. --- src/qemu/qemu_migration.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c index 2c7bf34..b8296ba 100644 --- a/src/qemu/qemu_migration.c +++ b/src/qemu/qemu_migration.c @@ -4136,6 +4136,8 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, int cookieoutlen = 0; int ret = -1; virErrorPtr orig_err = NULL; + virErrorPtr finish_err = NULL; + bool living = true; bool cancelled = true; virStreamPtr st = NULL; unsigned long destflags; @@ -4394,7 +4396,16 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, * The lock manager plugins should take care of * safety in this scenario. */ - cancelled = ddomain == NULL; + if (!cancelled && !ddomain) + finish_err = virSaveLastError(); + + if (finish_err && finish_err->message && + strstr(finish_err->message, "received hangup / error event on socket")) { + living = false; + VIR_ERROR(_("keepalive messages lost in finish step, shutdown src vm for protection")); + } else { + cancelled = ddomain == NULL; + } /* If finish3 set an error, and we don't have an earlier * one we need to preserve it in case confirm3 overwrites @@ -4427,10 +4438,17 @@ qemuMigrationSrcPerformPeer2Peer3(virQEMUDriverPtr driver, virObjectUnref(ddomain); ret = 0; } else { - ret = -1; + if (!living) + ret = 0; + else + ret = -1; } virObjectUnref(st); + if (finish_err) { + virSetError(finish_err); + virFreeError(finish_err); + } virErrorRestore(&orig_err); VIR_FREE(uri_out); -- 2.19.1