This is a repost of the start container support. Changes from the last version: * Report an error when allocation for init string fails in lxcExecContainerInit() * Change to find by name in lxcStartDomain() * Move tty forwarding process outside of the container. This allows consolidating the forwarding into a single process at a later time. This also means the the container init process as specified by the user now runs as the container root process with pid = 1. The tty setup will require some (hopefully minor) modifications when pts namespaces are enabled. * Add header comments to a number of the functions. This is an updated rough outline of the functions involved in starting a container and the namespace and process under which they run: lxcVmStart() - runs under libvirtd process lxcSetupTtyTunnel() - opens and configures parent tty lxcSetupContainerTty() - opens container tty fork child process calls lxcTtyForward() see below parent continues lxcStartContainer - see below return lxcStartContainer() - runs in parent namespace, libvirtd process Allocate stack for container clone() - child process will start in lxcChild() see below return lxcChild() - runs within container, child process from clone() mount user filesystems mount container /proc lxcExecWithTty() - see below, will not return lxcExecWithTty() - runs within container, root process lxcSetContainerStdio - sets container tty as primary console lxcExecContainerInit - see below, should not return exit() lxcExecContainerInit() - runs within container, root process exec containers init if exec fails, exit() Thanks! -- Best Regards, Dave Leskovec IBM Linux Technology Center Open Virtualization
--- src/Makefile.am | 1 src/lxc_conf.h | 2 src/lxc_container.c | 345 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lxc_container.h | 44 ++++++ src/lxc_driver.c | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 730 insertions(+), 2 deletions(-) Index: b/src/Makefile.am =================================================================== --- a/src/Makefile.am 2008-03-31 15:12:01.000000000 -0700 +++ b/src/Makefile.am 2008-03-31 15:15:56.000000000 -0700 @@ -61,6 +61,7 @@ openvz_driver.c openvz_driver.h \ lxc_driver.c lxc_driver.h \ lxc_conf.c lxc_conf.h \ + lxc_container.c lxc_container.h \ nodeinfo.h nodeinfo.c \ storage_conf.h storage_conf.c \ storage_driver.h storage_driver.c \ Index: b/src/lxc_container.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ b/src/lxc_container.h 2008-03-31 15:15:56.000000000 -0700 @@ -0,0 +1,44 @@ +/* + * Copyright IBM Corp. 2008 + * + * lxc_container.h: header file for fcns run inside container + * + * Authors: + * David L. Leskovec <dlesko at linux.vnet.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LXC_CONTAINER_H +#define LXC_CONTAINER_H + +#ifdef WITH_LXC + +/* Function declarations */ +int lxcChild( void *argv ); + +#endif /* LXC_DRIVER_H */ + +#endif /* LXC_CONTAINER_H */ + +/* + * Local variables: + * indent-tabs-mode: nil + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 4 + * End: + */ + Index: b/src/lxc_driver.c =================================================================== --- a/src/lxc_driver.c 2008-03-31 15:12:01.000000000 -0700 +++ b/src/lxc_driver.c 2008-04-04 16:50:35.000000000 -0700 @@ -25,17 +25,22 @@ #ifdef WITH_LXC +#include <fcntl.h> +#include <poll.h> #include <sched.h> #include <sys/utsname.h> #include <string.h> #include <sys/types.h> +#include <termios.h> #include <unistd.h> #include <wait.h> #include "lxc_conf.h" +#include "lxc_container.h" #include "lxc_driver.h" #include "driver.h" #include "internal.h" +#include "util.h" /* debug macros */ #define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) @@ -375,11 +380,342 @@ return lxcGenerateXML(dom->conn, driver, vm, vm->def); } +static int lxcStartContainer(virConnectPtr conn, + lxc_driver_t* driver, + lxc_vm_t *vm) +{ + int rc = -1; + int flags; + int stacksize = getpagesize() * 4; + void *stack, *stacktop; + + /* allocate a stack for the container */ + stack = malloc(stacksize); + if (!stack) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, + _("unable to allocate container stack")); + goto error_exit; + } + stacktop = (char*)stack + stacksize; + + flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; + + vm->def->id = clone(lxcChild, stacktop, flags, (void *)vm); + + DEBUG("clone() returned, %d", vm->def->id); + + if (vm->def->id < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("clone() failed, %s"), strerror(errno)); + goto error_exit; + } + + /* do we need the save config here? */ + lxcSaveConfig(NULL, driver, vm, vm->def); + + rc = 0; + +error_exit: + return rc; +} + +static int lxcPutTtyInRawMode(virConnectPtr conn, int ttyDev) +{ + int rc = -1; + + struct termios ttyAttr; + + if (tcgetattr(ttyDev, &ttyAttr) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "tcgetattr() failed: %s", strerror(errno)); + goto cleanup; + } + + cfmakeraw(&ttyAttr); + + if (tcsetattr(ttyDev, TCSADRAIN, &ttyAttr) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "tcsetattr failed: %s", strerror(errno)); + goto cleanup; + } + + rc = 0; + +cleanup: + return rc; +} + +static int lxcSetupTtyTunnel(virConnectPtr conn, + lxc_vm_def_t *vmDef, + int* ttyDev) +{ + int rc = -1; + char *ptsStr; + + if (0 < strlen(vmDef->tty)) { + *ttyDev = open(vmDef->tty, O_RDWR|O_NOCTTY|O_NONBLOCK); + if (*ttyDev < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "open() tty failed: %s", strerror(errno)); + goto setup_complete; + } + + rc = grantpt(*ttyDev); + if (rc < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "grantpt() failed: %s", strerror(errno)); + goto setup_complete; + } + + rc = unlockpt(*ttyDev); + if (rc < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "unlockpt() failed: %s", strerror(errno)); + goto setup_complete; + } + + /* get the name and print it to stdout */ + ptsStr = ptsname(*ttyDev); + if (ptsStr == NULL) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + "ptsname() failed"); + goto setup_complete; + } + /* This value may need to be stored in the container configuration file */ + if (STRNEQ(ptsStr, vmDef->tty)) { + strcpy(vmDef->tty, ptsStr); + } + + /* Enter raw mode, so all characters are passed directly to child */ + if (lxcPutTtyInRawMode(conn, *ttyDev) < 0) { + goto setup_complete; + } + + } else { + *ttyDev = -1; + } + + rc = 0; + +setup_complete: + if((0 != rc) && (*ttyDev > 0)) { + close(*ttyDev); + } + + return rc; +} + +/* how are we going to return the pts name to the container? */ +static int lxcSetupContainerTty(virConnectPtr conn, + int *ttymaster, + char **ttyName) +{ + int rc = -1; + char tempTtyName[PATH_MAX]; + + *ttymaster = posix_openpt(O_RDWR|O_NOCTTY); + if (*ttymaster < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("posix_openpt failed: %s"), strerror(errno)); + goto cleanup; + } + + if (unlockpt(*ttymaster) < 0) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("unlockpt failed: %s"), strerror(errno)); + goto cleanup; + } + + if (0 != ptsname_r(*ttymaster, tempTtyName, sizeof(tempTtyName))) { + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, + _("ptsname_r failed: %s"), strerror(errno)); + goto cleanup; + } + + *ttyName = malloc(sizeof(char) * (strlen(tempTtyName) + 1)); + if (NULL == ttyName) { + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, + _("unable to allocate container name string")); + goto cleanup; + } + + strcpy(*ttyName, tempTtyName); + + rc = 0; + +cleanup: + if (0 != rc) { + if (-1 != *ttymaster) { + close(*ttymaster); + } + } + + return rc; +} + +static int lxcTtyForward(int fd1, int fd2, + int *loopFlag ATTRIBUTE_UNUSED, + int pollmsecs ATTRIBUTE_UNUSED) +{ + int rc = -1; + int i; + char buf[2]; + struct pollfd fds[2]; + int numFds = 0; + + if (0 <= fd1) { + fds[numFds].fd = fd1; + fds[numFds].events = POLLIN; + ++numFds; + } + + if (0 <= fd2) { + fds[numFds].fd = fd2; + fds[numFds].events = POLLIN; + ++numFds; + } + + if (0 == numFds) { + DEBUG0("No fds to monitor, return"); + goto cleanup; + } + + while (1/*!(*loopFlag)*/) { + if ((rc = poll(fds, numFds, -1/*pollmsecs*/)) <= 0) { +#if 0 + if(*loopFlag) { + goto cleanup; + } +#endif + + if ((0 == rc) || (errno == EINTR) || (errno == EAGAIN)) { + continue; + } + + DEBUG("poll returned error: %s", strerror(errno)); + goto cleanup; + } + + for (i = 0; i < numFds; ++i) { + if (!fds[i].revents) { + continue; + } + + if (fds[i].revents & POLLIN) { + saferead(fds[i].fd, buf, 1); + if (1 < numFds) { + safewrite(fds[i ^ 1].fd, buf, 1); + } + + } + + } + + } + + rc = 0; + +cleanup: + return rc; +} + +static int lxcVmStart(virConnectPtr conn, + lxc_driver_t * driver, + lxc_vm_t * vm) +{ + int rc = -1; + lxc_vm_def_t *vmDef = vm->def; + + /* open parent tty */ + if (lxcSetupTtyTunnel(conn, vmDef, &vm->parentTty) < 0) { + goto cleanup; + } + + /* open container tty */ + if (lxcSetupContainerTty(conn, &(vm->containerTtyFd), &(vm->containerTty)) < 0) { + goto cleanup; + } + + /* fork process to handle the tty io forwarding */ + if ((vm->pid = fork()) == 0) { + /* child process calls forward routine */ + lxcTtyForward(vm->parentTty, vm->containerTtyFd, NULL, 0); + } + + rc = lxcStartContainer(conn, driver, vm); + + if (rc == 0) { + vm->state = VIR_DOMAIN_RUNNING; + driver->ninactivevms--; + driver->nactivevms++; + } + +cleanup: + return rc; +} + +static int lxcDomainStart(virDomainPtr dom) +{ + int rc = -1; + virConnectPtr conn = dom->conn; + lxc_driver_t *driver = (lxc_driver_t *)(conn->privateData); + lxc_vm_t *vm = lxcFindVMByName(driver, dom->name); + + if (!vm) { + lxcError(conn, dom, VIR_ERR_INVALID_DOMAIN, + "no domain with uuid"); + goto cleanup; + } + + rc = lxcVmStart(conn, driver, vm); + +cleanup: + return rc; +} + +static virDomainPtr +lxcDomainCreateAndStart(virConnectPtr conn, + const char *xml, + unsigned int flags ATTRIBUTE_UNUSED) { + lxc_driver_t *driver = (lxc_driver_t *)conn->privateData; + lxc_vm_t *vm; + lxc_vm_def_t *def; + virDomainPtr dom = NULL; + + if (!(def = lxcParseVMDef(conn, xml, NULL))) { + goto return_point; + } + + if (!(vm = lxcAssignVMDef(conn, driver, def))) { + lxcFreeVMDef(def); + goto return_point; + } + + if (lxcSaveVMDef(conn, driver, vm, def) < 0) { + lxcRemoveInactiveVM(driver, vm); + return NULL; + } + + if (lxcVmStart(conn, driver, vm) < 0) { + lxcRemoveInactiveVM(driver, vm); + goto return_point; + } + + dom = virGetDomain(conn, vm->def->name, vm->def->uuid); + if (dom) { + dom->id = vm->def->id; + } + +return_point: + return dom; +} static int lxcStartup(void) { uid_t uid = getuid(); + debugFlag = 1; + /* Check that the user is root */ if (0 != uid) { return -1; @@ -469,7 +805,7 @@ NULL, /* getCapabilities */ lxcListDomains, /* listDomains */ lxcNumDomains, /* numOfDomains */ - NULL/*lxcDomainCreateLinux*/, /* domainCreateLinux */ + lxcDomainCreateAndStart, /* domainCreateLinux */ lxcDomainLookupByID, /* domainLookupByID */ lxcDomainLookupByUUID, /* domainLookupByUUID */ lxcDomainLookupByName, /* domainLookupByName */ @@ -493,7 +829,7 @@ lxcDomainDumpXML, /* domainDumpXML */ lxcListDefinedDomains, /* listDefinedDomains */ lxcNumDefinedDomains, /* numOfDefinedDomains */ - NULL, /* domainCreate */ + lxcDomainStart, /* domainCreate */ lxcDomainDefine, /* domainDefineXML */ lxcDomainUndefine, /* domainUndefine */ NULL, /* domainAttachDevice */ Index: b/src/lxc_conf.h =================================================================== --- a/src/lxc_conf.h 2008-03-31 05:02:12.000000000 -0700 +++ b/src/lxc_conf.h 2008-04-04 12:08:00.000000000 -0700 @@ -72,6 +72,8 @@ char configFileBase[PATH_MAX]; int parentTty; + int containerTtyFd; + char* containerTty; lxc_vm_def_t *def; Index: b/src/lxc_container.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ b/src/lxc_container.c 2008-04-04 16:19:02.000000000 -0700 @@ -0,0 +1,345 @@ +/* + * Copyright IBM Corp. 2008 + * + * lxc_container.c: file description + * + * Authors: + * David L. Leskovec <dlesko at linux.vnet.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <config.h> + +#ifdef WITH_LXC + +#include <fcntl.h> +#include <limits.h> +#include <poll.h> +#include <signal.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "lxc_container.h" +#include "lxc_conf.h" +#include "util.h" + +#define DEBUG(fmt,...) VIR_DEBUG(__FILE__, fmt, __VA_ARGS__) +#define DEBUG0(msg) VIR_DEBUG(__FILE__, "%s", msg) + +/* Functions */ +static int lxcExecContainerInit(lxc_vm_def_t *vmDef) +{ + int rc = -1; + char* execString; + int execStringLen = strlen(vmDef->init) + 1 + 5; + + if(NULL == (execString = calloc(execStringLen, sizeof(char)))) { + DEBUG0("failed to calloc memory for init string"); + goto error_out; + } + + strcpy(execString, "exec "); + strcat(execString, vmDef->init); + + execl("/bin/sh", "sh", "-c", execString, (char*)NULL); + DEBUG("execl failed: %s", strerror(errno)); + +error_out: + exit(rc); +} + +#if 0 +static int lxcSetupContainerTty(int *ttymaster, int *ttyslave) +{ + int rc = -1; + char ttyName[PATH_MAX]; + + *ttymaster = posix_openpt(O_RDWR|O_NOCTTY); + if (*ttymaster < 0) { + DEBUG("posix_openpt() failed: %s", strerror(errno)); + goto cleanup; + } + + if (unlockpt(*ttymaster) < 0) { + DEBUG("unlockpt() failed: %s", strerror(errno)); + goto cleanup; + } + + if (0 != ptsname_r(*ttymaster, ttyName, sizeof(ttyName))) { + DEBUG("failed to mount for container: %s", strerror(errno)); + goto cleanup; + } + + *ttyslave = open(ttyName, O_RDWR|O_NOCTTY); + if (*ttyslave < 0) { + DEBUG("open(%s) failed: %s", ttyName, strerror(errno)); + goto cleanup; + } + + rc = 0; + +cleanup: + if (0 != rc) { + if (-1 != *ttyslave) { + close(*ttyslave); + } + if (-1 != *ttymaster) { + close(*ttymaster); + } + } + + return rc; +} +#endif + +static int lxcSetContainerStdio(const char *ttyName) +{ + int rc = -1; + int ttyfd; + + if (setsid() < 0) { + DEBUG0("setsid failed"); + goto error_out; + } + + ttyfd = open(ttyName, O_RDWR|O_NOCTTY); + if (ttyfd < 0) { + DEBUG("open(%s) failed: %s", ttyName, strerror(errno)); + goto error_out; + } + + if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) { + DEBUG("ioctl(TIOCSTTY) failed: %s", strerror(errno)); + goto cleanup; + } + + close(0); close(1); close(2); + + if (dup2(ttyfd, 0) < 0) { + DEBUG("dup2(stdin) failed: %s", strerror(errno)); + goto cleanup; + } + + if (dup2(ttyfd, 1) < 0) { + DEBUG("dup2(stdout) failed: %s", strerror(errno)); + goto cleanup; + } + + if (dup2(ttyfd, 2) < 0) { + DEBUG("dup2(stderr) failed: %s", strerror(errno)); + goto cleanup; + } + + rc = 0; + +cleanup: + close(ttyfd); + +error_out: + return rc; +} + +#if 0 +static int lxcTtyForward(int fd1, int fd2, int *loopFlag, int pollmsecs) +{ + int rc = -1; + int i; + char buf[2]; + struct pollfd fds[2]; + int numFds = 0; + + if (0 <= fd1) { + fds[numFds].fd = fd1; + fds[numFds].events = POLLIN; + ++numFds; + } + + if (0 <= fd2) { + fds[numFds].fd = fd2; + fds[numFds].events = POLLIN; + ++numFds; + } + + if (0 == numFds) { + DEBUG0("No fds to monitor, return"); + goto cleanup; + } + + while (!(*loopFlag)) { + if ((rc = poll(fds, numFds, pollmsecs)) <= 0) { + if(*loopFlag) { + goto cleanup; + } + + if ((0 == rc) || (errno == EINTR) || (errno == EAGAIN)) { + continue; + } + + DEBUG("poll returned error: %s", strerror(errno)); + goto cleanup; + } + + for (i = 0; i < numFds; ++i) { + if (!fds[i].revents) { + continue; + } + + if (fds[i].revents & POLLIN) { + saferead(fds[i].fd, buf, 1); + if (1 < numFds) { + safewrite(fds[i ^ 1].fd, buf, 1); + } + + } + + } + + } + + rc = 0; + +cleanup: + return rc; +} + +static pid_t initPid; +static int exitChildLoop; +static void lxcExecChildHandler(int sig ATTRIBUTE_UNUSED, + siginfo_t *signalInfo, + void *context ATTRIBUTE_UNUSED) +{ + DEBUG("lxcExecChildHandler signal from %d\n", signalInfo->si_pid); + + if (signalInfo->si_pid == initPid) { + exitChildLoop = 1; + } else { + waitpid(signalInfo->si_pid, NULL, WNOHANG); + } + +} +#endif + +static int lxcExecWithTty(lxc_vm_t *vm) +{ + int rc = -1; + lxc_vm_def_t *vmDef = vm->def; +#if 0 + int ttymaster = -1; + int ttyslave = -1; + struct sigaction sigAction; + sigset_t sigMask; + int childStatus; + + if (lxcSetupContainerTty(&ttymaster, &ttyslave) < 0) { + goto exit_with_error; + } + + sigAction.sa_sigaction = lxcExecChildHandler; + sigfillset(&sigMask); + sigAction.sa_mask = sigMask; + sigAction.sa_flags = SA_SIGINFO; + if (0 != sigaction(SIGCHLD, &sigAction, NULL)) { + DEBUG("sigaction failed: %s\n", strerror(errno)); + goto exit_with_error; + } + + exitChildLoop = 0; + if ((initPid = fork()) == 0) { + if(lxcSetContainerStdio(ttyslave) < 0) { + exitChildLoop = 1; + goto exit_with_error; + } + + lxcExecContainerInit(vmDef); + /* this function will not return. if it fails, it will exit */ + } + + close(ttyslave); + lxcTtyForward(ttymaster, vm->parentTty, + &exitChildLoop, 100); + + DEBUG("child waiting on pid %d", initPid); + waitpid(initPid, &childStatus, 0); + rc = WEXITSTATUS(childStatus); + DEBUG("container exited with rc: %d", rc); +#endif + + if(lxcSetContainerStdio(vm->containerTty) < 0) { + goto exit_with_error; + } + + lxcExecContainerInit(vmDef); + +exit_with_error: + exit(rc); +} + +int lxcChild( void *argv ) +{ + int rc = -1; + lxc_vm_t *vm = (lxc_vm_t *)argv; + lxc_vm_def_t *vmDef = vm->def; + lxc_mount_t *curMount; + int i; + + if (NULL == vmDef) { + DEBUG0("lxcChild() passed invalid vm definition"); + goto cleanup; + } + + /* handle the bind mounts first before doing anything else that may */ + /* then access those mounted dirs */ + curMount = vmDef->mounts; + for (i = 0; curMount; curMount = curMount->next) { + rc = mount(curMount->source, + curMount->target, + NULL, + MS_BIND, + NULL); + if (0 != rc) { + DEBUG("failed to mount for container: %s", strerror(errno)); + goto cleanup; + } + } + + /* mount /proc */ + rc = mount("lxcproc", "/proc", "proc", 0, NULL); + if (0 != rc) { + DEBUG("failed to mount /proc for container: %s", strerror(errno)); + goto cleanup; + } + + rc = lxcExecWithTty(vm); + /* this function will only return if an error occured */ + +cleanup: + return rc; +} + +#endif /* WITH_LXC */ + +/* + * Local variables: + * indent-tabs-mode: nil + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 4 + * End: + */ +
-- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list