If the QEMU process has been stopped (kill -STOP/gdb), or the QEMU process has live-locked itself, then we will never get a reply from the monitor. We should not wait forever in this case, but instead timeout after a reasonable amount of time. NB if the host has high CPU load, or a single monitor command intentionally takes a long time, then this will cause bogus failures. In the case of high CPU load, arguably the guest should have been migrated elsewhere, since you can't effectively manage guests on a host if QEMU is taking > 30 seconds to reply to simply commands. Since we use background migration, there should not be any commands which take significant time to execute any more * src/qemu/qemu_monitor.c: Timeout waiting for reply after 30 seconds --- src/qemu/qemu_monitor.c | 21 ++++++++++++++++++--- 1 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c index 89a3f64..d9b6600 100644 --- a/src/qemu/qemu_monitor.c +++ b/src/qemu/qemu_monitor.c @@ -781,10 +781,19 @@ char *qemuMonitorNextCommandID(qemuMonitorPtr mon) } +/* Give up waiting for reply after 30 seconds */ +#define QEMU_MONITOR_WAIT_TIME (1000ull * 30) + int qemuMonitorSend(qemuMonitorPtr mon, qemuMonitorMessagePtr msg) { int ret = -1; + unsigned long long now; + unsigned long long then; + + if (virTimeMs(&now) < 0) + return -1; + then = now + QEMU_MONITOR_WAIT_TIME; /* Check whether qemu quited unexpectedly */ if (mon->lastError.code != VIR_ERR_OK) { @@ -798,9 +807,15 @@ int qemuMonitorSend(qemuMonitorPtr mon, qemuMonitorUpdateWatch(mon); while (!mon->msg->finished) { - if (virCondWait(&mon->notify, &mon->lock) < 0) { - qemuReportError(VIR_ERR_INTERNAL_ERROR, "%s", - _("Unable to wait on monitor condition")); + if (virCondWaitUntil(&mon->notify, &mon->lock, then) < 0) { + if (errno == ETIMEDOUT) + qemuReportError(VIR_ERR_OPERATION_TIMEOUT, + "%s", _("no reply received from qemu")); + else + virReportSystemError(errno, + "%s", _("cannot wait on monitor condition")); + /* Ensure no further monitor commands can be run */ + virCopyLastError(&mon->lastError); goto cleanup; } } -- 1.7.4.4 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list