When starting a migration with --timeout, we create a thread to call the migration API and in parallel setup a timer for the timeout. The description of --timeout says: "run action specified by --timeout-* option (suspend by default) if live migration exceeds timeout", which is not really the way this feature was implemented. Before live migration starts we first need to contact the source to get the domain definition and send it to the destination where a new QEMU process has to be started. This can take some (unpredictably long) time while the timeout timer is already running. If a very short timeout is set (which doesn't really make sense, but it's allowed), we may even end up taking the timeout action before the actual migration had a chance to start. With this patch the timeout is started only after we get non-zero dataTotal from virDomainGetJobInfo, which means the migration (of either storage or memory) really started. https://issues.redhat.com/browse/RHEL-41264 Signed-off-by: Jiri Denemark <jdenemar@xxxxxxxxxx> --- tools/virsh-domain.c | 55 ++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/tools/virsh-domain.c b/tools/virsh-domain.c index e4923284af..546db955a9 100644 --- a/tools/virsh-domain.c +++ b/tools/virsh-domain.c @@ -4237,7 +4237,10 @@ typedef void (*jobWatchTimeoutFunc)(vshControl *ctl, virDomainPtr dom, struct virshWatchData { vshControl *ctl; virDomainPtr dom; + GMainContext *context; jobWatchTimeoutFunc timeout_func; + int timeout_secs; + GSource *timeout_src; void *opaque; const char *label; GIOChannel *stdin_ioc; @@ -4259,6 +4262,20 @@ virshWatchTimeout(gpointer opaque) } +static void +virshWatchSetTimeout(struct virshWatchData *data) +{ + vshDebug(data->ctl, VSH_ERR_DEBUG, + "watchJob: setting timeout of %d secs\n", data->timeout_secs); + + data->timeout_src = g_timeout_source_new_seconds(data->timeout_secs); + g_source_set_callback(data->timeout_src, + virshWatchTimeout, + data, NULL); + g_source_attach(data->timeout_src, data->context); +} + + static gboolean virshWatchProgress(gpointer opaque) { @@ -4290,10 +4307,17 @@ virshWatchProgress(gpointer opaque) jobinfo.type == VIR_DOMAIN_JOB_UNBOUNDED)) { vshTTYDisableInterrupt(data->ctl); data->jobStarted = true; + vshDebug(data->ctl, VSH_ERR_DEBUG, + "watchJob: job started\n"); + } - if (!data->verbose) { + if (data->jobStarted) { + if (data->timeout_secs > 0 && !data->timeout_src) { + if (jobinfo.dataTotal > 0) + virshWatchSetTimeout(data); + } else if (!data->verbose) { vshDebug(data->ctl, VSH_ERR_DEBUG, - "watchJob: job started, disabling callback\n"); + "watchJob: disabling callback\n"); return G_SOURCE_REMOVE; } } @@ -4356,13 +4380,15 @@ virshWatchJob(vshControl *ctl, struct sigaction sig_action; struct sigaction old_sig_action; #endif /* !WIN32 */ - g_autoptr(GSource) timeout_src = NULL; g_autoptr(GSource) progress_src = NULL; g_autoptr(GSource) stdin_src = NULL; struct virshWatchData data = { .ctl = ctl, .dom = dom, + .context = g_main_loop_get_context(eventLoop), .timeout_func = timeout_func, + .timeout_secs = timeout_secs, + .timeout_src = NULL, .opaque = opaque, .label = label, .stdin_ioc = NULL, @@ -4391,27 +4417,14 @@ virshWatchJob(vshControl *ctl, g_source_set_callback(stdin_src, (GSourceFunc)virshWatchInterrupt, &data, NULL); - g_source_attach(stdin_src, - g_main_loop_get_context(eventLoop)); - } - - if (timeout_secs) { - vshDebug(ctl, VSH_ERR_DEBUG, - "watchJob: setting timeout of %d secs\n", timeout_secs); - timeout_src = g_timeout_source_new_seconds(timeout_secs); - g_source_set_callback(timeout_src, - virshWatchTimeout, - &data, NULL); - g_source_attach(timeout_src, - g_main_loop_get_context(eventLoop)); + g_source_attach(stdin_src, data.context); } progress_src = g_timeout_source_new(500); g_source_set_callback(progress_src, virshWatchProgress, &data, NULL); - g_source_attach(progress_src, - g_main_loop_get_context(eventLoop)); + g_source_attach(progress_src, data.context); g_main_loop_run(eventLoop); @@ -4420,8 +4433,10 @@ virshWatchJob(vshControl *ctl, if (*job_err == 0 && verbose) /* print [100 %] */ virshPrintJobProgress(label, 0, 1); - if (timeout_src) - g_source_destroy(timeout_src); + if (data.timeout_src) { + g_source_destroy(data.timeout_src); + g_source_unref(data.timeout_src); + } g_source_destroy(progress_src); if (stdin_src) g_source_destroy(stdin_src); -- 2.47.0