A number of bugs conspired together to cause some nasty problems when a QEMU vm failed to start - vm->monitor was not initialized to -1, so when a VM failed to start the vm->monitor was just '0', and thus we closed FD 0 (libvirtd's stdin) - The next client to connect got FD 0 as its socket - The first bug struck again, causing the client to be closed even though libvirt thought it was still open - libvirtd now polle on FD=0, which gave back POLLNVAL because it was closed - event.c was not looking for POLLNVAL so it span 100% cpu when this happened, instead of invoking the callback with an error code - virsh was not cleaning up the priv->watiDispatch call upon I/O errors, so virsh then hung when doing virConenctClose This patch does 3 things - Treats POLLNVAL as VIR_EVENT_HANDLE_ERROR, so the callback gets to see the error & de-registers the client from the event loop - Add the missing initialization of vm->monitor - Fix remote_internal.c handling of I/O errors Daniel Index: qemud/event.c =================================================================== RCS file: /data/cvs/libvirt/qemud/event.c,v retrieving revision 1.17 diff -u -p -u -p -r1.17 event.c --- qemud/event.c 22 Dec 2008 12:55:47 -0000 1.17 +++ qemud/event.c 5 Feb 2009 17:12:51 -0000 @@ -653,6 +653,8 @@ virPollEventToEventHandleType(int events ret |= VIR_EVENT_HANDLE_WRITABLE; if(events & POLLERR) ret |= VIR_EVENT_HANDLE_ERROR; + if(events & POLLNVAL) /* Treat NVAL as error, since libvirt doesn't distinguish */ + ret |= VIR_EVENT_HANDLE_ERROR; if(events & POLLHUP) ret |= VIR_EVENT_HANDLE_HANGUP; return ret; Index: src/domain_conf.c =================================================================== RCS file: /data/cvs/libvirt/src/domain_conf.c,v retrieving revision 1.64 diff -u -p -u -p -r1.64 domain_conf.c --- src/domain_conf.c 30 Jan 2009 21:52:22 -0000 1.64 +++ src/domain_conf.c 5 Feb 2009 17:12:51 -0000 @@ -504,6 +504,7 @@ virDomainObjPtr virDomainAssignDef(virCo domain->state = VIR_DOMAIN_SHUTOFF; domain->def = def; domain->monitor_watch = -1; + domain->monitor = -1; if (VIR_REALLOC_N(doms->objs, doms->count + 1) < 0) { virReportOOMError(conn); Index: src/remote_internal.c =================================================================== RCS file: /data/cvs/libvirt/src/remote_internal.c,v retrieving revision 1.136 diff -u -p -u -p -r1.136 remote_internal.c --- src/remote_internal.c 3 Feb 2009 13:08:07 -0000 1.136 +++ src/remote_internal.c 5 Feb 2009 17:12:52 -0000 @@ -6192,17 +6192,17 @@ processCalls(virConnectPtr conn, continue; virReportSystemError(in_open ? NULL : conn, errno, "%s", _("poll on socket failed")); - return -1; + goto error; } if (fds[0].revents & POLLOUT) { if (processCallSend(conn, priv, in_open) < 0) - return -1; + goto error; } if (fds[0].revents & POLLIN) { if (processCallRecv(conn, priv, in_open) < 0) - return -1; + goto error; } /* Iterate through waiting threads and if @@ -6253,9 +6253,21 @@ processCalls(virConnectPtr conn, if (fds[0].revents & (POLLHUP | POLLERR)) { errorf(in_open ? NULL : conn, VIR_ERR_INTERNAL_ERROR, "%s", _("received hangup / error event on socket")); - return -1; + goto error; } } + + +error: + priv->waitDispatch = thiscall->next; + DEBUG("Giving up the buck due to I/O error %d %p %p", thiscall->proc_nr, thiscall, priv->waitDispatch); + /* See if someone else is still waiting + * and if so, then pass the buck ! */ + if (priv->waitDispatch) { + DEBUG("Passing the buck to %d %p", priv->waitDispatch->proc_nr, priv->waitDispatch); + virCondSignal(&priv->waitDispatch->cond); + } + return -1; } /* -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list