Re: [PATCH] lxc: start domain

Daniel Veillard <veillard@xxxxxxxxxx> · Thu, 27 Mar 2008 09:05:41 -0400

On Wed, Mar 26, 2008 at 11:20:59PM -0700, Dave Leskovec wrote:
> This is a repost of patch four in the last series I posted.  It contains the
> start container support.  I've made some changes corresponding to Dan B's patch
> moving the lxc driver under libvirtd.  I removed the isolation forks and cleaned
> up the status handling and PID storing.

  General comment over the patch is that I prefer to see all function with
an header comment, it's a bit painful, but helps graps the context when an
error arise and someone who don't know the code try to sort it out. That's
not a blocker for commits but better for long term maintainance :-)

[...]
> +/* Functions */
> +static int lxcExecContainerInit(lxc_vm_def_t *vmDef)
> +{
> +    int rc = -1;
> +    char* execString;
> +    int execStringLen = strlen(vmDef->init) + 1 + 5;
> +
> +    if(NULL == (execString = calloc(execStringLen, sizeof(char)))) {

   An error should really be reported here.

> +        goto error_out;
> +    }
> +
> +    strcpy(execString, "exec ");
> +    strcat(execString, vmDef->init);

 Hum, it seems there is an off by one allocation error, you don't allocate
the space for the terminating 0

> +    execl("/bin/sh", "sh", "-c", execString, (char*)NULL);
> +    DEBUG("execl failed: %s", strerror(errno));
> +
> +error_out:
> +    exit(rc);
> +}
[...]

> +static int lxcTtyForward(int fd1, int fd2, int *loopFlag, int pollmsecs)
> +{
> +    int rc = -1;
> +    int i;
> +    char buf[2];
> +    struct pollfd fds[2];
> +    int numFds = 0;
> +
> +    if (0 <= fd1) {
> +        fds[numFds].fd = fd1;
> +        fds[numFds].events = POLLIN;
> +        ++numFds;
> +    }
> +
> +    if (0 <= fd2) {
> +        fds[numFds].fd = fd2;
> +        fds[numFds].events = POLLIN;
> +        ++numFds;
> +    }
> +
> +    if (0 == numFds) {
> +        DEBUG0("No fds to monitor, return");
> +        goto cleanup;
> +    }
> +
> +    while (!(*loopFlag)) {
> +        if ((rc = poll(fds, numFds, pollmsecs)) <= 0) {
> +            if(*loopFlag) {
> +                goto cleanup;
> +            }
> +
> +            if ((0 == rc) || (errno == EINTR) || (errno == EAGAIN)) {
> +                continue;
> +            }
> +
> +            DEBUG("poll returned error: %s", strerror(errno));
> +            goto cleanup;
> +        }
> +
> +        for (i = 0; i < numFds; ++i) {
> +            if (!fds[i].revents) {
> +                continue;
> +            }
> +
> +            if (fds[i].revents & POLLIN) {
> +                saferead(fds[i].fd, buf, 1);
> +                if (1 < numFds) {
> +                    safewrite(fds[i ^ 1].fd, buf, 1);
> +                }
> +

  So if only one fd is given we discard all data read, and if 2 fds are given
all data from one is forwarded to the other, right ?

> +static int exitChildLoop;
> +static void lxcExecChildHandler(int sig ATTRIBUTE_UNUSED,
> +                                siginfo_t *signalInfo,
> +                                void *context ATTRIBUTE_UNUSED)
> +{
> +    DEBUG("lxcExecChildHandler signal from %d\n", signalInfo->si_pid);
> +
> +    if (signalInfo->si_pid == initPid) {
> +        exitChildLoop = 1;
> +    } else {
> +        waitpid(signalInfo->si_pid, NULL, WNOHANG);
> +    }
> +
> +}
> +
> +static int lxcExecWithTty(lxc_vm_t *vm)
> +{
> +    int rc = -1;
> +    lxc_vm_def_t *vmDef = vm->def;
> +    int ttymaster = -1;
> +    int ttyslave = -1;
> +    struct sigaction sigAction;
> +    sigset_t sigMask;
> +    int childStatus;
> +
> +    if (lxcSetupContainerTty(&ttymaster, &ttyslave) < 0) {
> +        goto exit_with_error;
> +    }
> +
> +    sigAction.sa_sigaction = lxcExecChildHandler;
> +    sigfillset(&sigMask);
> +    sigAction.sa_mask = sigMask;
> +    sigAction.sa_flags = SA_SIGINFO;
> +    if (0 != sigaction(SIGCHLD, &sigAction, NULL)) {
> +        DEBUG("sigaction failed: %s\n", strerror(errno));
> +        goto exit_with_error;
> +    }
> +
> +    exitChildLoop = 0;
> +    if ((initPid = fork()) == 0) {
> +        if(lxcSetContainerStdio(ttyslave) < 0) {
> +            exitChildLoop = 1;
> +            goto exit_with_error;
> +        }
> +
> +        lxcExecContainerInit(vmDef);
> +        /* this function will not return.  if it fails, it will exit */
> +    }
> +
> +    close(ttyslave);
> +    lxcTtyForward(ttymaster, vm->parentTty,
> +                  &exitChildLoop, 100);
> +
> +    DEBUG("child waiting on pid %d", initPid);
> +    waitpid(initPid, &childStatus, 0);
> +    rc = WEXITSTATUS(childStatus);
> +    DEBUG("container exited with rc: %d", rc);
> +
> +exit_with_error:
> +    exit(rc);
> +}

  So this is forked for each container created, right ?

[...]
> Index: b/src/lxc_container.h
ok

> Index: b/src/lxc_driver.c
> ===================================================================
> --- a/src/lxc_driver.c	2008-03-21 11:46:11.000000000 -0700
> +++ b/src/lxc_driver.c	2008-03-24 16:46:48.000000000 -0700
> @@ -25,14 +25,17 @@
[...]
> +static int lxcStartContainer(virConnectPtr conn,
> +                             lxc_driver_t* driver,
> +                             lxc_vm_t *vm)
> +{
> +    int rc = -1;
> +    int flags;
> +    int stacksize = getpagesize() * 4;
> +    void *stack, *stacktop;
> +
> +    /* allocate a stack for the container */
> +    stack = malloc(stacksize);
> +    if (!stack) {
> +        lxcError(conn, NULL, VIR_ERR_NO_MEMORY,
> +                 _("unable to allocate container stack"));
> +        goto error_exit;
> +    }
> +    stacktop = (char*)stack + stacksize;
> +
> +    flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
> +
> +    vm->pid = clone(lxcChild, stacktop, flags, (void *)vm);
> +
> +    DEBUG("clone() returned, %d", vm->pid);
> +
> +    if (vm->pid < 0) {
> +        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
> +                 _("clone() failed, %s"), strerror(errno));
> +        goto error_exit;
> +    }
> +
> +    vm->def->id = vm->pid;
> +    lxcSaveConfig(NULL, driver, vm, vm->def);

  We should make sure at some point taht there is some kind of locking
mechanism when creating those config files, no ?

> +    rc = 0;
> +
> +error_exit:
> +    return rc;
> +}
[...]
> +static int lxcVmStart(virConnectPtr conn,
> +                      lxc_driver_t * driver,
> +                      lxc_vm_t * vm)
> +{
> +    int rc = -1;
> +    lxc_vm_def_t *vmDef = vm->def;
> +
> +    /* open tty for the container */
> +    if(lxcSetupTtyTunnel(conn, vmDef, &vm->parentTty) < 0) {
> +        goto cleanup;
> +    }
> +
> +    rc = lxcStartContainer(conn, driver, vm);
> +
> +    if(rc == 0) {
> +        vm->state = VIR_DOMAIN_RUNNING;
> +        driver->ninactivevms--;
> +        driver->nactivevms++;
> +    }
> +
> +cleanup:
> +    return rc;
> +}
[...]

  Hum, now that config names are saved based on domain names, wouldn't
it be better to use a name based lookup ? Less precise but more direct

[...]
>  
>  static int lxcStartup(void)
>  {
> @@ -459,7 +666,7 @@
>      NULL, /* getCapabilities */
>      lxcListDomains, /* listDomains */
>      lxcNumDomains, /* numOfDomains */
> -    NULL/*lxcDomainCreateLinux*/, /* domainCreateLinux */
> +    lxcDomainCreateAndStart, /* domainCreateLinux */
>      lxcDomainLookupByID, /* domainLookupByID */
>      lxcDomainLookupByUUID, /* domainLookupByUUID */
>      lxcDomainLookupByName, /* domainLookupByName */
> @@ -483,7 +690,7 @@
>      lxcDomainDumpXML, /* domainDumpXML */
>      lxcListDefinedDomains, /* listDefinedDomains */
>      lxcNumDefinedDomains, /* numOfDefinedDomains */
> -    NULL, /* domainCreate */
> +    lxcDomainStart, /* domainCreate */
>      lxcDomainDefine, /* domainDefineXML */
>      lxcDomainUndefine, /* domainUndefine */
>      NULL, /* domainAttachDevice */

  Looks fine overall. I wonder if 1 fork/clone per container can't be
reduced to a single process doing the fd processing for all the containers
running. But that's probably harder, more fragile and for little gain.

Daniel

-- 
Red Hat Virtualization group http://redhat.com/virtualization/
Daniel Veillard      | virtualization library  http://libvirt.org/
veillard@xxxxxxxxxx  | libxml GNOME XML XSLT toolkit  http://xmlsoft.org/
http://veillard.com/ | Rpmfind RPM search engine  http://rpmfind.net/

--
Libvir-list mailing list
Libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list