On Fri, Feb 06, 2009 at 04:48:09PM +0000, Daniel P. Berrange wrote: > On Fri, Feb 06, 2009 at 05:27:47PM +0100, Guido G?nther wrote: > > On Fri, Feb 06, 2009 at 10:33:51AM +0100, Farkas Levente wrote: > > > if ret is None:raise libvirtError('virDomainCreateLinux() failed', > > > conn=self) > > > libvirtError: internal error unable to start guest: > > I'm currently working around this with: > > > > diff --git a/src/qemu_driver.c b/src/qemu_driver.c > > index 09f69bf..b2f2b47 100644 > > --- a/src/qemu_driver.c > > +++ b/src/qemu_driver.c > > @@ -674,8 +674,6 @@ qemudReadMonitorOutput(virConnectPtr conn, > > _("Failure while reading %s startup output"), what); > > return -1; > > } > > - } else if (ret == 0) { > > - return 0; > > } else { > > got += ret; > > buf[got] = '\0'; > > > > Didn't find the time to debug this properly yet. > > That will cause libvirtd to spin 100% cpu forever, if a guest fails > to start up. eg disk to mis-configured disk > > The core problem here, is that ret == 0 has 2 possible implications > > - QEMU has exited, and no more data will be written > - QEMU is still starting up, and we have read all the data > written so far, but more may arrive soon. > > The current code there is correct for the first scenario, but even > removing it, is not entirely correct for the 2nd scenario. If we > hit ret == 0, and QEMU is still running, we shouldn't spin 100% > CPU in read - we should poll() to wait for more data. > > As a quick fix though, we could probably detect whether QEMU has exited > by doing 'kill(vm->pid, 0)' and check for errno == ESRCH - indicates > that the process no longer exists. > > eg, > > } else if (ret == 0) { > if (kill(vm->pid, 0) == -1) { > if (errno == ERSCH) > return 0; > else > return -1; > } else { > continue; /* should really go into poll() at this point */ > } > } else { The problem is that we're using this function for two purposes: To read from a logfile where poll()'ing on POLLIN always returns "data readable" which leaves us spinning (and EOF might get in our way) and also using it on the monitor PTY itself where it works as expected. Why not simply introduce qemudReadLogOutput? This at least allows us to usleep while waiting for the log file to fill up with data. I couldn't make libvirtd spin with this patch anymore. O.k. to apply? Cheers, -- Guido P.S.: I also changed the timeouts to be in seconds.
>From 6655474aed6e601a6c2e16e7020589f51f58893b Mon Sep 17 00:00:00 2001 From: =?utf-8?q?Guido=20G=C3=BCnther?= <agx@xxxxxxxxxxx> Date: Sat, 7 Feb 2009 17:16:39 +0100 Subject: [PATCH] usleep to wait for domain logfile to fill up --- src/qemu_driver.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 57 insertions(+), 8 deletions(-) diff --git a/src/qemu_driver.c b/src/qemu_driver.c index 47ca6c7..09be3fb 100644 --- a/src/qemu_driver.c +++ b/src/qemu_driver.c @@ -601,6 +601,7 @@ qemudReadMonitorOutput(virConnectPtr conn, { int got = 0; buf[0] = '\0'; + timeout *= 1000; /* poll wants milli seconds */ /* Consume & discard the initial greeting */ while (got < (buflen-1)) { @@ -662,6 +663,56 @@ qemudReadMonitorOutput(virConnectPtr conn, } + +/* + * Returns -1 for error, 0 on success + */ +static int +qemudReadLogOutput(virConnectPtr conn, + virDomainObjPtr vm, + int fd, + char *buf, + int buflen, + qemudHandlerMonitorOutput func, + const char *what, + int timeout) +{ + int got = 0; + int ret; + int retries = timeout*10; + buf[0] = '\0'; + + while (retries) { + while((ret = read(fd, buf+got, buflen-got-1)) > 0) { + got += ret; + buf[got] = '\0'; + if ((buflen-got-1) == 0) { + qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("Out of space while reading %s log output"), what); + return -1; + } + } + + if (ret < 0 && errno != EINTR) { + virReportSystemError(conn, errno, + _("Failure while reading %s log output"), + what); + return -1; + } + + ret = func(conn, vm, buf, fd); + if (ret <= 0) + return ret; + + usleep(100*1000); + retries--; + } + if (retries == 0) + qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("Timed out while reading %s log output"), what); + return -1; +} + static int qemudCheckMonitorPrompt(virConnectPtr conn ATTRIBUTE_UNUSED, virDomainObjPtr vm, @@ -706,7 +757,7 @@ static int qemudOpenMonitor(virConnectPtr conn, vm, monfd, buf, sizeof(buf), qemudCheckMonitorPrompt, - "monitor", 10000) <= 0) + "monitor", 10) <= 0) ret = -1; else ret = 0; @@ -738,6 +789,7 @@ static int qemudOpenMonitor(virConnectPtr conn, return ret; } +/* Returns -1 for error, 0 success, 1 continue reading */ static int qemudExtractMonitorPath(virConnectPtr conn, const char *haystack, size_t *offset, @@ -841,21 +893,18 @@ static int qemudWaitForMonitor(virConnectPtr conn, < 0) return -1; - ret = qemudReadMonitorOutput(conn, vm, logfd, buf, sizeof(buf), - qemudFindCharDevicePTYs, - "console", 3000); + ret = qemudReadLogOutput(conn, vm, logfd, buf, sizeof(buf), + qemudFindCharDevicePTYs, + "console", 3); if (close(logfd) < 0) { char ebuf[1024]; qemudLog(QEMUD_WARN, _("Unable to close logfile: %s\n"), virStrerror(errno, ebuf, sizeof ebuf)); } - if (ret == 1) /* Success */ + if (ret == 0) /* success */ return 0; - if (ret == -1) - return -1; - /* Unexpected end of file - inform user of QEMU log data */ qemudReportError(conn, NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("unable to start guest: %s"), buf); -- 1.6.0.6
-- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list