On Wed, Sep 28, 2011 at 03:11:10PM +0100, Daniel P. Berrange wrote: > From: "Daniel P. Berrange" <berrange@xxxxxxxxxx> > > The LXC controller and container attempt to run in lockstep > using a series of handshakes. There was a flaw in the container > side though, causing it todo partial setup work, before waiting > for the initial controller handshake. > > This in turn meant some of the controller setup could fail, > and obscure the real error from the container setup. Moving > the lxcContainerWaitForContinue call to be the first thing > the container does solves that. > > The controller still, however, pollutes the log with a message > about the container handshake failing, when the container > shuts down during startup. > > To deal with this requires special handling of the EOF condition > on the controllers lxcContainerWaitForContinue() call. > > * src/lxc/lxc_container.c: Wait for continue message from > controller before doing anything at all > * src/lxc/lxc_controller.c: Don't pollute log with error > message if EOF was returned from lxcContainerWaitForContinue. > * src/lxc/lxc_driver.c: Handle EOF from controller handshake > --- > src/lxc/lxc_container.c | 38 ++++++++++++++++++++++++++------------ > src/lxc/lxc_controller.c | 31 ++++++++++++++++++++++++++++--- > src/lxc/lxc_driver.c | 2 +- > 3 files changed, 55 insertions(+), 16 deletions(-) > > diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c > index 787df9a..ee5ca9f 100644 > --- a/src/lxc/lxc_container.c > +++ b/src/lxc/lxc_container.c > @@ -214,7 +214,7 @@ error_out: > * parent process. It will send this message on the socket pair stored in > * the vm structure once it has completed the post clone container setup. > * > - * Returns 0 on success or -1 in case of error > + * Returns 1 on success, 0 on EOF, or -1 in case of error > */ > int lxcContainerWaitForContinue(int control) > { > @@ -222,12 +222,18 @@ int lxcContainerWaitForContinue(int control) > int readLen; > > readLen = saferead(control, &msg, sizeof(msg)); > - if (readLen != sizeof(msg) || > - msg != LXC_CONTINUE_MSG) { > + if (readLen < 0) > + return -1; > + > + if (readLen != sizeof(msg)) > + return 0; > + > + if (msg != LXC_CONTINUE_MSG) { > + errno = EINVAL; > return -1; > } > > - return 0; > + return 1; > } > > > @@ -1036,6 +1042,20 @@ static int lxcContainerChild( void *data ) > char *ttyPath = NULL; > virDomainFSDefPtr root; > virCommandPtr cmd = NULL; > + int rc; > + > + /* Wait for interface devices to show up */ > + if ((rc = lxcContainerWaitForContinue(argv->monitor)) <= 0) { > + if (rc < 0) > + virReportSystemError(errno, "%s", > + _("Failed to read the container continue message")); > + else > + lxcError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("Controller unexpectedly quit")); > + goto cleanup; > + } > + > + VIR_DEBUG("Received container continue message"); > > if (NULL == vmDef) { > lxcError(VIR_ERR_INTERNAL_ERROR, > @@ -1079,14 +1099,6 @@ static int lxcContainerChild( void *data ) > goto cleanup; > } > > - /* Wait for interface devices to show up */ > - if (lxcContainerWaitForContinue(argv->monitor) < 0) { > - virReportSystemError(errno, "%s", > - _("Failed to read the container continue message")); > - goto cleanup; > - } > - VIR_DEBUG("Received container continue message"); > - > /* rename and enable interfaces */ > if (lxcContainerRenameAndEnableInterfaces(argv->nveths, > argv->veths) < 0) { > @@ -1120,6 +1132,8 @@ cleanup: > } > > virCommandFree(cmd); > + if (ret < 0) > + virDispatchError(NULL); > return ret; > } > > diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c > index 52d382e..8c3a54f 100644 > --- a/src/lxc/lxc_controller.c > +++ b/src/lxc/lxc_controller.c > @@ -780,6 +780,9 @@ static int lxcSetPersonality(virDomainDefPtr def) > # define MS_SLAVE (1<<19) > #endif > > +/* > + * Returns -1 on error, 0 if container quit unexpected, 1 on success > + */ > static int > lxcControllerRun(virDomainDefPtr def, > unsigned int nveths, > @@ -801,6 +804,7 @@ lxcControllerRun(virDomainDefPtr def, > size_t nloopDevs = 0; > int *loopDevs = NULL; > size_t i; > + int ret; > > if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) { > virReportSystemError(errno, "%s", > @@ -935,12 +939,25 @@ lxcControllerRun(virDomainDefPtr def, > goto cleanup; > } > > - if (lxcContainerWaitForContinue(containerhandshake[0]) < 0) { > + if ((ret = lxcContainerWaitForContinue(containerhandshake[0])) < 0) { > virReportSystemError(errno, "%s", > _("error receiving signal from container")); > goto cleanup; > } > > + if (ret == 0) { > + rc = 0; > + /* We're not raising an error, since the container will have > + * done that already and we don't want to confuse the driver > + * when it fetches the error message from logs > + */ > +#if 0 > + lxcError(VIR_ERR_INTERNAL_ERROR, "%s", > + _("container quit during startup handshake")); > +#endif > + goto cleanup; > + } > + > /* Now the container is fully setup... */ > > /* ...we can close the loop devices... */ > @@ -959,7 +976,10 @@ lxcControllerRun(virDomainDefPtr def, > } > VIR_FORCE_CLOSE(handshakefd); > > - rc = lxcControllerMain(monitor, client, appPty, containerPty, container); > + if (lxcControllerMain(monitor, client, appPty, containerPty, container) != 0) > + goto cleanup; > + > + rc = 1; > > cleanup: > VIR_FREE(devptmx); > @@ -1186,5 +1206,10 @@ cleanup: > unlink(sockpath); > VIR_FREE(sockpath); > > - return rc ? EXIT_FAILURE : EXIT_SUCCESS; > + /* If rc == 0, we have an error, but the lxc container > + * startup will have printed it already.*/ > + if (rc < 0) > + virDispatchError(NULL); > + > + return rc <=0 ? EXIT_FAILURE : EXIT_SUCCESS; > } > diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c > index 4b62600..759e3a9 100644 > --- a/src/lxc/lxc_driver.c > +++ b/src/lxc/lxc_driver.c > @@ -1622,7 +1622,7 @@ static int lxcVmStart(virConnectPtr conn, > vm->def->id = vm->pid; > virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason); > > - if (lxcContainerWaitForContinue(handshakefds[0]) < 0) { > + if (lxcContainerWaitForContinue(handshakefds[0]) <= 0) { > char out[1024]; > > if (!(lxcReadLogOutput(vm, logfile, pos, out, 1024) < 0)) { ACK, Daniel -- Daniel Veillard | libxml Gnome XML XSLT toolkit http://xmlsoft.org/ daniel@xxxxxxxxxxxx | Rpmfind RPM search engine http://rpmfind.net/ http://veillard.com/ | virtualization library http://libvirt.org/ -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list