On Wed, Mar 26, 2008 at 11:20:59PM -0700, Dave Leskovec wrote: > This is a repost of patch four in the last series I posted. It contains the > start container support. I've made some changes corresponding to Dan B's patch > moving the lxc driver under libvirtd. I removed the isolation forks and cleaned > up the status handling and PID storing. General comment over the patch is that I prefer to see all function with an header comment, it's a bit painful, but helps graps the context when an error arise and someone who don't know the code try to sort it out. That's not a blocker for commits but better for long term maintainance :-) [...] > +/* Functions */ > +static int lxcExecContainerInit(lxc_vm_def_t *vmDef) > +{ > + int rc = -1; > + char* execString; > + int execStringLen = strlen(vmDef->init) + 1 + 5; > + > + if(NULL == (execString = calloc(execStringLen, sizeof(char)))) { An error should really be reported here. > + goto error_out; > + } > + > + strcpy(execString, "exec "); > + strcat(execString, vmDef->init); Hum, it seems there is an off by one allocation error, you don't allocate the space for the terminating 0 > + execl("/bin/sh", "sh", "-c", execString, (char*)NULL); > + DEBUG("execl failed: %s", strerror(errno)); > + > +error_out: > + exit(rc); > +} [...] > +static int lxcTtyForward(int fd1, int fd2, int *loopFlag, int pollmsecs) > +{ > + int rc = -1; > + int i; > + char buf[2]; > + struct pollfd fds[2]; > + int numFds = 0; > + > + if (0 <= fd1) { > + fds[numFds].fd = fd1; > + fds[numFds].events = POLLIN; > + ++numFds; > + } > + > + if (0 <= fd2) { > + fds[numFds].fd = fd2; > + fds[numFds].events = POLLIN; > + ++numFds; > + } > + > + if (0 == numFds) { > + DEBUG0("No fds to monitor, return"); > + goto cleanup; > + } > + > + while (!(*loopFlag)) { > + if ((rc = poll(fds, numFds, pollmsecs)) <= 0) { > + if(*loopFlag) { > + goto cleanup; > + } > + > + if ((0 == rc) || (errno == EINTR) || (errno == EAGAIN)) { > + continue; > + } > + > + DEBUG("poll returned error: %s", strerror(errno)); > + goto cleanup; > + } > + > + for (i = 0; i < numFds; ++i) { > + if (!fds[i].revents) { > + continue; > + } > + > + if (fds[i].revents & POLLIN) { > + saferead(fds[i].fd, buf, 1); > + if (1 < numFds) { > + safewrite(fds[i ^ 1].fd, buf, 1); > + } > + So if only one fd is given we discard all data read, and if 2 fds are given all data from one is forwarded to the other, right ? > +static int exitChildLoop; > +static void lxcExecChildHandler(int sig ATTRIBUTE_UNUSED, > + siginfo_t *signalInfo, > + void *context ATTRIBUTE_UNUSED) > +{ > + DEBUG("lxcExecChildHandler signal from %d\n", signalInfo->si_pid); > + > + if (signalInfo->si_pid == initPid) { > + exitChildLoop = 1; > + } else { > + waitpid(signalInfo->si_pid, NULL, WNOHANG); > + } > + > +} > + > +static int lxcExecWithTty(lxc_vm_t *vm) > +{ > + int rc = -1; > + lxc_vm_def_t *vmDef = vm->def; > + int ttymaster = -1; > + int ttyslave = -1; > + struct sigaction sigAction; > + sigset_t sigMask; > + int childStatus; > + > + if (lxcSetupContainerTty(&ttymaster, &ttyslave) < 0) { > + goto exit_with_error; > + } > + > + sigAction.sa_sigaction = lxcExecChildHandler; > + sigfillset(&sigMask); > + sigAction.sa_mask = sigMask; > + sigAction.sa_flags = SA_SIGINFO; > + if (0 != sigaction(SIGCHLD, &sigAction, NULL)) { > + DEBUG("sigaction failed: %s\n", strerror(errno)); > + goto exit_with_error; > + } > + > + exitChildLoop = 0; > + if ((initPid = fork()) == 0) { > + if(lxcSetContainerStdio(ttyslave) < 0) { > + exitChildLoop = 1; > + goto exit_with_error; > + } > + > + lxcExecContainerInit(vmDef); > + /* this function will not return. if it fails, it will exit */ > + } > + > + close(ttyslave); > + lxcTtyForward(ttymaster, vm->parentTty, > + &exitChildLoop, 100); > + > + DEBUG("child waiting on pid %d", initPid); > + waitpid(initPid, &childStatus, 0); > + rc = WEXITSTATUS(childStatus); > + DEBUG("container exited with rc: %d", rc); > + > +exit_with_error: > + exit(rc); > +} So this is forked for each container created, right ? [...] > Index: b/src/lxc_container.h ok > Index: b/src/lxc_driver.c > =================================================================== > --- a/src/lxc_driver.c 2008-03-21 11:46:11.000000000 -0700 > +++ b/src/lxc_driver.c 2008-03-24 16:46:48.000000000 -0700 > @@ -25,14 +25,17 @@ [...] > +static int lxcStartContainer(virConnectPtr conn, > + lxc_driver_t* driver, > + lxc_vm_t *vm) > +{ > + int rc = -1; > + int flags; > + int stacksize = getpagesize() * 4; > + void *stack, *stacktop; > + > + /* allocate a stack for the container */ > + stack = malloc(stacksize); > + if (!stack) { > + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, > + _("unable to allocate container stack")); > + goto error_exit; > + } > + stacktop = (char*)stack + stacksize; > + > + flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; > + > + vm->pid = clone(lxcChild, stacktop, flags, (void *)vm); > + > + DEBUG("clone() returned, %d", vm->pid); > + > + if (vm->pid < 0) { > + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, > + _("clone() failed, %s"), strerror(errno)); > + goto error_exit; > + } > + > + vm->def->id = vm->pid; > + lxcSaveConfig(NULL, driver, vm, vm->def); We should make sure at some point taht there is some kind of locking mechanism when creating those config files, no ? > + rc = 0; > + > +error_exit: > + return rc; > +} [...] > +static int lxcVmStart(virConnectPtr conn, > + lxc_driver_t * driver, > + lxc_vm_t * vm) > +{ > + int rc = -1; > + lxc_vm_def_t *vmDef = vm->def; > + > + /* open tty for the container */ > + if(lxcSetupTtyTunnel(conn, vmDef, &vm->parentTty) < 0) { > + goto cleanup; > + } > + > + rc = lxcStartContainer(conn, driver, vm); > + > + if(rc == 0) { > + vm->state = VIR_DOMAIN_RUNNING; > + driver->ninactivevms--; > + driver->nactivevms++; > + } > + > +cleanup: > + return rc; > +} [...] Hum, now that config names are saved based on domain names, wouldn't it be better to use a name based lookup ? Less precise but more direct [...] > > static int lxcStartup(void) > { > @@ -459,7 +666,7 @@ > NULL, /* getCapabilities */ > lxcListDomains, /* listDomains */ > lxcNumDomains, /* numOfDomains */ > - NULL/*lxcDomainCreateLinux*/, /* domainCreateLinux */ > + lxcDomainCreateAndStart, /* domainCreateLinux */ > lxcDomainLookupByID, /* domainLookupByID */ > lxcDomainLookupByUUID, /* domainLookupByUUID */ > lxcDomainLookupByName, /* domainLookupByName */ > @@ -483,7 +690,7 @@ > lxcDomainDumpXML, /* domainDumpXML */ > lxcListDefinedDomains, /* listDefinedDomains */ > lxcNumDefinedDomains, /* numOfDefinedDomains */ > - NULL, /* domainCreate */ > + lxcDomainStart, /* domainCreate */ > lxcDomainDefine, /* domainDefineXML */ > lxcDomainUndefine, /* domainUndefine */ > NULL, /* domainAttachDevice */ Looks fine overall. I wonder if 1 fork/clone per container can't be reduced to a single process doing the fd processing for all the containers running. But that's probably harder, more fragile and for little gain. Daniel -- Red Hat Virtualization group http://redhat.com/virtualization/ Daniel Veillard | virtualization library http://libvirt.org/ veillard@xxxxxxxxxx | libxml GNOME XML XSLT toolkit http://xmlsoft.org/ http://veillard.com/ | Rpmfind RPM search engine http://rpmfind.net/ -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list