[PATCH] lxc: start domain

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a repost of patch four in the last series I posted.  It contains the
start container support.  I've made some changes corresponding to Dan B's patch
moving the lxc driver under libvirtd.  I removed the isolation forks and cleaned
up the status handling and PID storing.

Thanks!

-- 
Best Regards,
Dave Leskovec
IBM Linux Technology Center
Open Virtualization

---
 src/Makefile.am     |    1 
 src/lxc_container.c |  324 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/lxc_container.h |   44 +++++++
 src/lxc_driver.c    |  211 +++++++++++++++++++++++++++++++++
 4 files changed, 578 insertions(+), 2 deletions(-)

Index: b/src/lxc_container.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ b/src/lxc_container.c	2008-03-24 16:28:47.000000000 -0700
@@ -0,0 +1,324 @@
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_container.c: file description
+ *
+ * Authors:
+ *  David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <config.h>
+
+#ifdef WITH_LXC
+
+#include <fcntl.h>
+#include <limits.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "lxc_container.h"
+#include "lxc_conf.h"
+#include "util.h"
+
+#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__)
+#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg)
+
+/* Functions */
+static int lxcExecContainerInit(lxc_vm_def_t *vmDef)
+{
+    int rc = -1;
+    char* execString;
+    int execStringLen = strlen(vmDef->init) + 1 + 5;
+
+    if(NULL == (execString = calloc(execStringLen, sizeof(char)))) {
+        goto error_out;
+    }
+
+    strcpy(execString, "exec ");
+    strcat(execString, vmDef->init);
+
+    execl("/bin/sh", "sh", "-c", execString, (char*)NULL);
+    DEBUG("execl failed: %s", strerror(errno));
+
+error_out:
+    exit(rc);
+}
+
+static int lxcSetupContainerTty(int *ttymaster, int *ttyslave)
+{
+    int rc = -1;
+    char ttyName[PATH_MAX];
+
+    *ttymaster = posix_openpt(O_RDWR|O_NOCTTY);
+    if (*ttymaster < 0) {
+        DEBUG("posix_openpt() failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    if (unlockpt(*ttymaster) < 0) {
+        DEBUG("unlockpt() failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    if (0 != ptsname_r(*ttymaster, ttyName, sizeof(ttyName))) {
+        DEBUG("failed to mount for container: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    *ttyslave = open(ttyName, O_RDWR|O_NOCTTY);
+    if (*ttyslave < 0) {
+        DEBUG("open(%s) failed: %s", ttyName, strerror(errno));
+        goto cleanup;
+    }
+
+    rc = 0;
+
+cleanup:
+    if (0 != rc) {
+        if (-1 != *ttyslave) {
+            close(*ttyslave);
+        }
+        if (-1 != *ttymaster) {
+            close(*ttymaster);
+        }
+    }
+
+    return rc;
+}
+
+static int lxcSetContainerStdio(int ttyfd)
+{
+    int rc = -1;
+
+    if (setsid() < 0) {
+        DEBUG0("setsid failed");
+        goto cleanup;
+    }
+
+    if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) {
+        DEBUG("ioctl(TIOCSTTY) failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    close(0); close(1); close(2);
+
+    if (dup2(ttyfd, 0) < 0) {
+        DEBUG("dup2(stdin) failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    if (dup2(ttyfd, 1) < 0) {
+        DEBUG("dup2(stdout) failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    if (dup2(ttyfd, 2) < 0) {
+        DEBUG("dup2(stderr) failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    rc = 0;
+
+cleanup:
+    close(ttyfd);
+
+    return rc;
+}
+
+static int lxcTtyForward(int fd1, int fd2, int *loopFlag, int pollmsecs)
+{
+    int rc = -1;
+    int i;
+    char buf[2];
+    struct pollfd fds[2];
+    int numFds = 0;
+
+    if (0 <= fd1) {
+        fds[numFds].fd = fd1;
+        fds[numFds].events = POLLIN;
+        ++numFds;
+    }
+
+    if (0 <= fd2) {
+        fds[numFds].fd = fd2;
+        fds[numFds].events = POLLIN;
+        ++numFds;
+    }
+
+    if (0 == numFds) {
+        DEBUG0("No fds to monitor, return");
+        goto cleanup;
+    }
+
+    while (!(*loopFlag)) {
+        if ((rc = poll(fds, numFds, pollmsecs)) <= 0) {
+            if(*loopFlag) {
+                goto cleanup;
+            }
+
+            if ((0 == rc) || (errno == EINTR) || (errno == EAGAIN)) {
+                continue;
+            }
+
+            DEBUG("poll returned error: %s", strerror(errno));
+            goto cleanup;
+        }
+
+        for (i = 0; i < numFds; ++i) {
+            if (!fds[i].revents) {
+                continue;
+            }
+
+            if (fds[i].revents & POLLIN) {
+                saferead(fds[i].fd, buf, 1);
+                if (1 < numFds) {
+                    safewrite(fds[i ^ 1].fd, buf, 1);
+                }
+
+            }
+
+        }
+
+    }
+
+    rc = 0;
+
+cleanup:
+    return rc;
+}
+
+static pid_t initPid;
+static int exitChildLoop;
+static void lxcExecChildHandler(int sig ATTRIBUTE_UNUSED,
+                                siginfo_t *signalInfo,
+                                void *context ATTRIBUTE_UNUSED)
+{
+    DEBUG("lxcExecChildHandler signal from %d\n", signalInfo->si_pid);
+
+    if (signalInfo->si_pid == initPid) {
+        exitChildLoop = 1;
+    } else {
+        waitpid(signalInfo->si_pid, NULL, WNOHANG);
+    }
+
+}
+
+static int lxcExecWithTty(lxc_vm_t *vm)
+{
+    int rc = -1;
+    lxc_vm_def_t *vmDef = vm->def;
+    int ttymaster = -1;
+    int ttyslave = -1;
+    struct sigaction sigAction;
+    sigset_t sigMask;
+    int childStatus;
+
+    if (lxcSetupContainerTty(&ttymaster, &ttyslave) < 0) {
+        goto exit_with_error;
+    }
+
+    sigAction.sa_sigaction = lxcExecChildHandler;
+    sigfillset(&sigMask);
+    sigAction.sa_mask = sigMask;
+    sigAction.sa_flags = SA_SIGINFO;
+    if (0 != sigaction(SIGCHLD, &sigAction, NULL)) {
+        DEBUG("sigaction failed: %s\n", strerror(errno));
+        goto exit_with_error;
+    }
+
+    exitChildLoop = 0;
+    if ((initPid = fork()) == 0) {
+        if(lxcSetContainerStdio(ttyslave) < 0) {
+            exitChildLoop = 1;
+            goto exit_with_error;
+        }
+
+        lxcExecContainerInit(vmDef);
+        /* this function will not return.  if it fails, it will exit */
+    }
+
+    close(ttyslave);
+    lxcTtyForward(ttymaster, vm->parentTty,
+                  &exitChildLoop, 100);
+
+    DEBUG("child waiting on pid %d", initPid);
+    waitpid(initPid, &childStatus, 0);
+    rc = WEXITSTATUS(childStatus);
+    DEBUG("container exited with rc: %d", rc);
+
+exit_with_error:
+    exit(rc);
+}
+
+int lxcChild( void *argv )
+{
+    int rc = -1;
+    lxc_vm_t *vm = (lxc_vm_t *)argv;
+    lxc_vm_def_t *vmDef = vm->def;
+    lxc_mount_t *curMount;
+    int i;
+
+    if (NULL == vmDef) {
+        DEBUG0("lxcChild() passed invalid vm definition");
+        goto cleanup;
+    }
+
+    /* handle the bind mounts first before doing anything else that may */
+    /* then access those mounted dirs */
+    curMount = vmDef->mounts;
+    for (i = 0; curMount; curMount = curMount->next) {
+        rc = mount(curMount->source,
+                   curMount->target,
+                   NULL,
+                   MS_BIND,
+                   NULL);
+        if (0 != rc) {
+            DEBUG("failed to mount for container: %s", strerror(errno));
+            goto cleanup;
+        }
+    }
+
+    /* mount /proc */
+    rc = mount("lxcproc", "/proc", "proc", 0, NULL);
+    if (0 != rc) {
+        DEBUG("failed to mount /proc for container: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    rc = lxcExecWithTty(vm);
+    /* this function will only return if an error occured */
+
+cleanup:
+    return rc;
+}
+
+#endif /* WITH_LXC */
+
+/*
+ * Local variables:
+ *  indent-tabs-mode: nil
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 4
+ * End:
+ */
+
Index: b/src/lxc_container.h
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ b/src/lxc_container.h	2008-03-21 11:57:27.000000000 -0700
@@ -0,0 +1,44 @@
+/*
+ * Copyright IBM Corp. 2008
+ *
+ * lxc_container.h: header file for fcns run inside container
+ *
+ * Authors:
+ *  David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef LXC_CONTAINER_H
+#define LXC_CONTAINER_H
+
+#ifdef WITH_LXC
+
+/* Function declarations */
+int lxcChild( void *argv );
+
+#endif /* LXC_DRIVER_H */
+
+#endif /* LXC_CONTAINER_H */
+
+/*
+ * Local variables:
+ *  indent-tabs-mode: nil
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 4
+ * End:
+ */
+
Index: b/src/lxc_driver.c
===================================================================
--- a/src/lxc_driver.c	2008-03-21 11:46:11.000000000 -0700
+++ b/src/lxc_driver.c	2008-03-24 16:46:48.000000000 -0700
@@ -25,14 +25,17 @@
 
 #ifdef WITH_LXC
 
+#include <fcntl.h>
 #include <sched.h>
 #include <sys/utsname.h>
 #include <string.h>
 #include <sys/types.h>
+#include <termios.h>
 #include <unistd.h>
 #include <wait.h>
 
 #include "lxc_conf.h"
+#include "lxc_container.h"
 #include "lxc_driver.h"
 #include "driver.h"
 #include "internal.h"
@@ -375,6 +378,210 @@
     return lxcGenerateXML(dom->conn, driver, vm, vm->def);
 }
 
+static int lxcStartContainer(virConnectPtr conn,
+                             lxc_driver_t* driver,
+                             lxc_vm_t *vm)
+{
+    int rc = -1;
+    int flags;
+    int stacksize = getpagesize() * 4;
+    void *stack, *stacktop;
+
+    /* allocate a stack for the container */
+    stack = malloc(stacksize);
+    if (!stack) {
+        lxcError(conn, NULL, VIR_ERR_NO_MEMORY,
+                 _("unable to allocate container stack"));
+        goto error_exit;
+    }
+    stacktop = (char*)stack + stacksize;
+
+    flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD;
+
+    vm->pid = clone(lxcChild, stacktop, flags, (void *)vm);
+
+    DEBUG("clone() returned, %d", vm->pid);
+
+    if (vm->pid < 0) {
+        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("clone() failed, %s"), strerror(errno));
+        goto error_exit;
+    }
+
+    vm->def->id = vm->pid;
+    lxcSaveConfig(NULL, driver, vm, vm->def);
+
+    rc = 0;
+
+error_exit:
+    return rc;
+}
+
+static int lxcPutTtyInRawMode(virConnectPtr conn, int ttyDev)
+{
+    int rc = -1;
+
+    struct termios ttyAttr;
+
+    if (tcgetattr(ttyDev, &ttyAttr) < 0) {
+        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                 "tcgetattr() failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    cfmakeraw(&ttyAttr);
+
+    if (tcsetattr(ttyDev, TCSADRAIN, &ttyAttr) < 0) {
+        lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                 "tcsetattr failed: %s", strerror(errno));
+        goto cleanup;
+    }
+
+    rc = 0;
+
+cleanup:
+    return rc;
+}
+
+static int lxcSetupTtyTunnel(virConnectPtr conn,
+                             lxc_vm_def_t *vmDef,
+                             int* ttyDev)
+{
+    int rc = -1;
+    char *ptsStr;
+
+    if (0 < strlen(vmDef->tty)) {
+        *ttyDev = open(vmDef->tty, O_RDWR|O_NOCTTY|O_NONBLOCK);
+        if (*ttyDev < 0) {
+            lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                     "open() tty failed: %s", strerror(errno));
+            goto setup_complete;
+        }
+
+        rc = grantpt(*ttyDev);
+        if (rc < 0) {
+            lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                     "grantpt() failed: %s", strerror(errno));
+            goto setup_complete;
+        }
+
+        rc = unlockpt(*ttyDev);
+        if (rc < 0) {
+            lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                     "unlockpt() failed: %s", strerror(errno));
+            goto setup_complete;
+        }
+
+        /* get the name and print it to stdout */
+        ptsStr = ptsname(*ttyDev);
+        if (ptsStr == NULL) {
+            lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR,
+                     "ptsname() failed");
+            goto setup_complete;
+        }
+        /* This value may need to be stored in the container configuration file */
+        if (STRNEQ(ptsStr, vmDef->tty)) {
+            strcpy(vmDef->tty, ptsStr);
+        }
+
+        /* Enter raw mode, so all characters are passed directly to child */
+        if (lxcPutTtyInRawMode(conn, *ttyDev) < 0) {
+            goto setup_complete;
+        }
+
+    } else {
+        *ttyDev = -1;
+    }
+
+    rc = 0;
+
+setup_complete:
+    if((0 != rc) && (*ttyDev > 0)) {
+        close(*ttyDev);
+    }
+
+    return rc;
+}
+
+static int lxcVmStart(virConnectPtr conn,
+                      lxc_driver_t * driver,
+                      lxc_vm_t * vm)
+{
+    int rc = -1;
+    lxc_vm_def_t *vmDef = vm->def;
+
+    /* open tty for the container */
+    if(lxcSetupTtyTunnel(conn, vmDef, &vm->parentTty) < 0) {
+        goto cleanup;
+    }
+
+    rc = lxcStartContainer(conn, driver, vm);
+
+    if(rc == 0) {
+        vm->state = VIR_DOMAIN_RUNNING;
+        driver->ninactivevms--;
+        driver->nactivevms++;
+    }
+
+cleanup:
+    return rc;
+}
+
+static int lxcDomainStart(virDomainPtr dom)
+{
+    int rc = -1;
+    virConnectPtr conn = dom->conn;
+    lxc_driver_t *driver = (lxc_driver_t *)(conn->privateData);
+    lxc_vm_t *vm = lxcFindVMByUUID(driver, dom->uuid);
+
+    if (!vm) {
+        lxcError(conn, dom, VIR_ERR_INVALID_DOMAIN,
+                 "no domain with uuid");
+        goto cleanup;
+    }
+
+    rc = lxcVmStart(conn, driver, vm);
+
+cleanup:
+    return rc;
+}
+
+static virDomainPtr
+lxcDomainCreateAndStart(virConnectPtr conn,
+                        const char *xml,
+                        unsigned int flags ATTRIBUTE_UNUSED) {
+    lxc_driver_t *driver = (lxc_driver_t *)conn->privateData;
+    lxc_vm_t *vm;
+    lxc_vm_def_t *def;
+    virDomainPtr dom = NULL;
+
+    if (!(def = lxcParseVMDef(conn, xml, NULL))) {
+        goto return_point;
+    }
+
+    if (!(vm = lxcAssignVMDef(conn, driver, def))) {
+        lxcFreeVMDef(def);
+        goto return_point;
+    }
+
+    if (lxcSaveVMDef(conn, driver, vm, def) < 0) {
+        lxcRemoveInactiveVM(driver, vm);
+        return NULL;
+    }
+
+    if (lxcVmStart(conn, driver, vm) < 0) {
+        lxcRemoveInactiveVM(driver, vm);
+        goto return_point;
+    }
+
+    dom = virGetDomain(conn, vm->def->name, vm->def->uuid);
+    if (dom) {
+        dom->id = vm->def->id;
+    }
+
+return_point:
+    return dom;
+}
 
 static int lxcStartup(void)
 {
@@ -459,7 +666,7 @@
     NULL, /* getCapabilities */
     lxcListDomains, /* listDomains */
     lxcNumDomains, /* numOfDomains */
-    NULL/*lxcDomainCreateLinux*/, /* domainCreateLinux */
+    lxcDomainCreateAndStart, /* domainCreateLinux */
     lxcDomainLookupByID, /* domainLookupByID */
     lxcDomainLookupByUUID, /* domainLookupByUUID */
     lxcDomainLookupByName, /* domainLookupByName */
@@ -483,7 +690,7 @@
     lxcDomainDumpXML, /* domainDumpXML */
     lxcListDefinedDomains, /* listDefinedDomains */
     lxcNumDefinedDomains, /* numOfDefinedDomains */
-    NULL, /* domainCreate */
+    lxcDomainStart, /* domainCreate */
     lxcDomainDefine, /* domainDefineXML */
     lxcDomainUndefine, /* domainUndefine */
     NULL, /* domainAttachDevice */
Index: b/src/Makefile.am
===================================================================
--- a/src/Makefile.am	2008-03-21 08:03:37.000000000 -0700
+++ b/src/Makefile.am	2008-03-21 12:04:05.000000000 -0700
@@ -61,6 +61,7 @@
 		openvz_driver.c openvz_driver.h			\
 		lxc_driver.c lxc_driver.h			\
 		lxc_conf.c lxc_conf.h				\
+		lxc_container.c lxc_container.h				\
                 nodeinfo.h nodeinfo.c                           \
 		storage_conf.h storage_conf.c			\
 		storage_driver.h storage_driver.c		\

--
Libvir-list mailing list
Libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list

[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]