Hello Guys,
When the qemu process becomes hung, virsh will get stuck on the hung
guest and can't move forward. It can be reproduced by the following steps:
1. setup a virt guest with qemu-kvm, and start it
2. stop qemu process with following:
kill -STOP `ps aux | grep qemu | grep -v grep | awk '{print $2}'`
3. run the following command:
virsh list
I think we can add a timeout for qemu monitor to resolve this problem:
using virCondWaitUntil instead of virCondWait in qemuMonitorSend. What's
your opinions?
Thanks!
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
index fca8590..65d8de9 100644
--- a/src/qemu/qemu_monitor.c
+++ b/src/qemu/qemu_monitor.c
@@ -25,6 +25,7 @@
#include <poll.h>
#include <sys/un.h>
+#include <sys/time.h>
#include <unistd.h>
#include <fcntl.h>
@@ -691,11 +692,14 @@ int qemuMonitorClose(qemuMonitorPtr mon)
return refs;
}
+#define QEMU_JOB_WAIT_TIME (1000ull * 30)
int qemuMonitorSend(qemuMonitorPtr mon,
qemuMonitorMessagePtr msg)
{
int ret = -1;
+ struct timeval now;
+ unsigned long long then;
if (mon->eofcb) {
msg->lastErrno = EIO;
@@ -706,7 +710,14 @@ int qemuMonitorSend(qemuMonitorPtr mon,
qemuMonitorUpdateWatch(mon);
while (!mon->msg->finished) {
- if (virCondWait(&mon->notify, &mon->lock) < 0)
+ if (gettimeofday(&now, NULL) < 0) {
+ virReportSystemError(errno, "%s",
+ _("cannot get time of day"));
+ return -1;
+ }
+ then = (now.tv_sec * 1000ull) + (now.tv_usec / 1000);
+ then += QEMU_JOB_WAIT_TIME;
+ if (virCondWaitUntil(&mon->notify, &mon->lock, then) < 0)
goto cleanup;
}
--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list