Bugzilla:https://bugzilla.redhat.com/show_bug.cgi?id=822839 For non-p2p migration, if the network is down in the process of migrate, The virsh client will hang up for a fair long time. The patch will add keepalive into virsh to determine the status of network connection with remote libvirtd, aboring migration job after 30 seconds later since disconnection. --- tools/virsh.c | 29 +++++++++++++++++++++++------ 1 files changed, 23 insertions(+), 6 deletions(-) diff --git a/tools/virsh.c b/tools/virsh.c index 5226bd8..9099328 100644 --- a/tools/virsh.c +++ b/tools/virsh.c @@ -414,13 +414,14 @@ typedef struct __vshCtrlData { vshControl *ctl; const vshCmd *cmd; int writefd; + virConnectPtr dconn; } vshCtrlData; typedef void (*jobWatchTimeoutFunc) (vshControl *ctl, virDomainPtr dom, void *opaque); static bool -vshWatchJob(vshControl *ctl, +vshWatchJob(vshCtrlData *data, virDomainPtr dom, bool verbose, int pipe_fd, @@ -3277,6 +3278,7 @@ cmdSave(vshControl *ctl, const vshCmd *cmd) data.ctl = ctl; data.cmd = cmd; data.writefd = p[1]; + data.dconn = NULL; if (virThreadCreate(&workerThread, true, @@ -3284,7 +3286,7 @@ cmdSave(vshControl *ctl, const vshCmd *cmd) &data) < 0) goto cleanup; - ret = vshWatchJob(ctl, dom, verbose, p[0], 0, NULL, NULL, _("Save")); + ret = vshWatchJob(&data, dom, verbose, p[0], 0, NULL, NULL, _("Save")); virThreadJoin(&workerThread); @@ -3584,6 +3586,7 @@ cmdManagedSave(vshControl *ctl, const vshCmd *cmd) data.ctl = ctl; data.cmd = cmd; data.writefd = p[1]; + data.dconn = NULL; if (virThreadCreate(&workerThread, true, @@ -3591,7 +3594,7 @@ cmdManagedSave(vshControl *ctl, const vshCmd *cmd) &data) < 0) goto cleanup; - ret = vshWatchJob(ctl, dom, verbose, p[0], 0, + ret = vshWatchJob(&data, dom, verbose, p[0], 0, NULL, NULL, _("Managedsave")); virThreadJoin(&workerThread); @@ -4062,6 +4065,7 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) data.ctl = ctl; data.cmd = cmd; data.writefd = p[1]; + data.dconn = NULL; if (virThreadCreate(&workerThread, true, @@ -4069,7 +4073,7 @@ cmdDump(vshControl *ctl, const vshCmd *cmd) &data) < 0) goto cleanup; - ret = vshWatchJob(ctl, dom, verbose, p[0], 0, NULL, NULL, _("Dump")); + ret = vshWatchJob(&data, dom, verbose, p[0], 0, NULL, NULL, _("Dump")); virThreadJoin(&workerThread); @@ -7189,6 +7193,10 @@ doMigrate (void *opaque) dconn = virConnectOpenAuth (desturi, virConnectAuthPtrDefault, 0); if (!dconn) goto out; + data->dconn = dconn; + if (virConnectSetKeepAlive(dconn, 5, 5) < 0) + vshDebug(ctl, VSH_ERR_WARNING, "migrate: Failed to start keepalive\n"); + ddom = virDomainMigrate2(dom, dconn, xml, flags, dname, migrateuri, 0); if (ddom) { virDomainFree(ddom); @@ -7244,7 +7252,7 @@ vshMigrationTimeout(vshControl *ctl, } static bool -vshWatchJob(vshControl *ctl, +vshWatchJob(vshCtrlData *data, virDomainPtr dom, bool verbose, int pipe_fd, @@ -7262,6 +7270,7 @@ vshWatchJob(vshControl *ctl, char retchar; bool functionReturn = false; sigset_t sigmask, oldsigmask; + vshControl *ctl = data->ctl; sigemptyset(&sigmask); sigaddset(&sigmask, SIGINT); @@ -7305,6 +7314,13 @@ repoll: goto cleanup; } + if (data->dconn && virConnectIsAlive(data->dconn) <= 0) { + virDomainAbortJob(dom); + vshError(ctl, "%s", + _("Lost connection to destination host")); + goto cleanup; + } + GETTIMEOFDAY(&curr); if (timeout && (((int)(curr.tv_sec - start.tv_sec) * 1000 + (int)(curr.tv_usec - start.tv_usec) / 1000) > @@ -7378,13 +7394,14 @@ cmdMigrate(vshControl *ctl, const vshCmd *cmd) data.ctl = ctl; data.cmd = cmd; data.writefd = p[1]; + data.dconn = NULL; if (virThreadCreate(&workerThread, true, doMigrate, &data) < 0) goto cleanup; - functionReturn = vshWatchJob(ctl, dom, verbose, p[0], timeout, + functionReturn = vshWatchJob(&data, dom, verbose, p[0], timeout, vshMigrationTimeout, NULL, _("Migration")); virThreadJoin(&workerThread); -- 1.7.7.5 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list