Re: [PATCH] Improve LXC startup error messages

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Sep 28, 2011 at 03:11:10PM +0100, Daniel P. Berrange wrote:
> From: "Daniel P. Berrange" <berrange@xxxxxxxxxx>
> 
> The LXC controller and container attempt to run in lockstep
> using a series of handshakes. There was a flaw in the container
> side though, causing it todo partial setup work, before waiting
> for the initial controller handshake.
> 
> This in turn meant some of the controller setup could fail,
> and obscure the real error from the container setup. Moving
> the lxcContainerWaitForContinue call to be the first thing
> the container does solves that.
> 
> The controller still, however, pollutes the log with a message
> about the container handshake failing, when the container
> shuts down during startup.
> 
> To deal with this requires special handling of the EOF condition
> on the controllers lxcContainerWaitForContinue() call.
> 
> * src/lxc/lxc_container.c: Wait for continue message from
>   controller before doing anything at all
> * src/lxc/lxc_controller.c: Don't pollute log with error
>   message if EOF was returned from lxcContainerWaitForContinue.
> * src/lxc/lxc_driver.c: Handle EOF from controller handshake
> ---
>  src/lxc/lxc_container.c  |   38 ++++++++++++++++++++++++++------------
>  src/lxc/lxc_controller.c |   31 ++++++++++++++++++++++++++++---
>  src/lxc/lxc_driver.c     |    2 +-
>  3 files changed, 55 insertions(+), 16 deletions(-)
> 
> diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c
> index 787df9a..ee5ca9f 100644
> --- a/src/lxc/lxc_container.c
> +++ b/src/lxc/lxc_container.c
> @@ -214,7 +214,7 @@ error_out:
>   * parent process.  It will send this message on the socket pair stored in
>   * the vm structure once it has completed the post clone container setup.
>   *
> - * Returns 0 on success or -1 in case of error
> + * Returns 1 on success, 0 on EOF, or -1 in case of error
>   */
>  int lxcContainerWaitForContinue(int control)
>  {
> @@ -222,12 +222,18 @@ int lxcContainerWaitForContinue(int control)
>      int readLen;
>  
>      readLen = saferead(control, &msg, sizeof(msg));
> -    if (readLen != sizeof(msg) ||
> -        msg != LXC_CONTINUE_MSG) {
> +    if (readLen < 0)
> +        return -1;
> +
> +    if (readLen != sizeof(msg))
> +        return 0;
> +
> +    if (msg != LXC_CONTINUE_MSG) {
> +        errno = EINVAL;
>          return -1;
>      }
>  
> -    return 0;
> +    return 1;
>  }
>  
>  
> @@ -1036,6 +1042,20 @@ static int lxcContainerChild( void *data )
>      char *ttyPath = NULL;
>      virDomainFSDefPtr root;
>      virCommandPtr cmd = NULL;
> +    int rc;
> +
> +    /* Wait for interface devices to show up */
> +    if ((rc = lxcContainerWaitForContinue(argv->monitor)) <= 0) {
> +        if (rc < 0)
> +            virReportSystemError(errno, "%s",
> +                                 _("Failed to read the container continue message"));
> +        else
> +            lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                     _("Controller unexpectedly quit"));
> +        goto cleanup;
> +    }
> +
> +    VIR_DEBUG("Received container continue message");
>  
>      if (NULL == vmDef) {
>          lxcError(VIR_ERR_INTERNAL_ERROR,
> @@ -1079,14 +1099,6 @@ static int lxcContainerChild( void *data )
>          goto cleanup;
>      }
>  
> -    /* Wait for interface devices to show up */
> -    if (lxcContainerWaitForContinue(argv->monitor) < 0) {
> -        virReportSystemError(errno, "%s",
> -                             _("Failed to read the container continue message"));
> -        goto cleanup;
> -    }
> -    VIR_DEBUG("Received container continue message");
> -
>      /* rename and enable interfaces */
>      if (lxcContainerRenameAndEnableInterfaces(argv->nveths,
>                                                argv->veths) < 0) {
> @@ -1120,6 +1132,8 @@ cleanup:
>      }
>  
>      virCommandFree(cmd);
> +    if (ret < 0)
> +        virDispatchError(NULL);
>      return ret;
>  }
>  
> diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
> index 52d382e..8c3a54f 100644
> --- a/src/lxc/lxc_controller.c
> +++ b/src/lxc/lxc_controller.c
> @@ -780,6 +780,9 @@ static int lxcSetPersonality(virDomainDefPtr def)
>  # define MS_SLAVE              (1<<19)
>  #endif
>  
> +/*
> + * Returns -1 on error, 0 if container quit unexpected, 1 on success
> + */
>  static int
>  lxcControllerRun(virDomainDefPtr def,
>                   unsigned int nveths,
> @@ -801,6 +804,7 @@ lxcControllerRun(virDomainDefPtr def,
>      size_t nloopDevs = 0;
>      int *loopDevs = NULL;
>      size_t i;
> +    int ret;
>  
>      if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
>          virReportSystemError(errno, "%s",
> @@ -935,12 +939,25 @@ lxcControllerRun(virDomainDefPtr def,
>          goto cleanup;
>      }
>  
> -    if (lxcContainerWaitForContinue(containerhandshake[0]) < 0) {
> +    if ((ret = lxcContainerWaitForContinue(containerhandshake[0])) < 0) {
>          virReportSystemError(errno, "%s",
>                               _("error receiving signal from container"));
>          goto cleanup;
>      }
>  
> +    if (ret == 0) {
> +        rc = 0;
> +        /* We're not raising an error, since the container will have
> +         * done that already and we don't want to confuse the driver
> +         * when it fetches the error message from logs
> +         */
> +#if 0
> +        lxcError(VIR_ERR_INTERNAL_ERROR, "%s",
> +                 _("container quit during startup handshake"));
> +#endif
> +        goto cleanup;
> +    }
> +
>      /* Now the container is fully setup... */
>  
>      /* ...we can close the loop devices... */
> @@ -959,7 +976,10 @@ lxcControllerRun(virDomainDefPtr def,
>      }
>      VIR_FORCE_CLOSE(handshakefd);
>  
> -    rc = lxcControllerMain(monitor, client, appPty, containerPty, container);
> +    if (lxcControllerMain(monitor, client, appPty, containerPty, container) != 0)
> +        goto cleanup;
> +
> +    rc = 1;
>  
>  cleanup:
>      VIR_FREE(devptmx);
> @@ -1186,5 +1206,10 @@ cleanup:
>          unlink(sockpath);
>      VIR_FREE(sockpath);
>  
> -    return rc ? EXIT_FAILURE : EXIT_SUCCESS;
> +    /* If rc == 0, we have an error, but the lxc container
> +     * startup will have printed it already.*/
> +    if (rc < 0)
> +        virDispatchError(NULL);
> +
> +    return rc <=0 ? EXIT_FAILURE : EXIT_SUCCESS;
>  }
> diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
> index 4b62600..759e3a9 100644
> --- a/src/lxc/lxc_driver.c
> +++ b/src/lxc/lxc_driver.c
> @@ -1622,7 +1622,7 @@ static int lxcVmStart(virConnectPtr conn,
>      vm->def->id = vm->pid;
>      virDomainObjSetState(vm, VIR_DOMAIN_RUNNING, reason);
>  
> -    if (lxcContainerWaitForContinue(handshakefds[0]) < 0) {
> +    if (lxcContainerWaitForContinue(handshakefds[0]) <= 0) {
>          char out[1024];
>  
>          if (!(lxcReadLogOutput(vm, logfile, pos, out, 1024) < 0)) {

  ACK,

Daniel

-- 
Daniel Veillard      | libxml Gnome XML XSLT toolkit  http://xmlsoft.org/
daniel@xxxxxxxxxxxx  | Rpmfind RPM search engine http://rpmfind.net/
http://veillard.com/ | virtualization library  http://libvirt.org/

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list


[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]