Daniel Veillard wrote: > On Wed, Mar 26, 2008 at 11:20:59PM -0700, Dave Leskovec wrote: >> This is a repost of patch four in the last series I posted. It contains the >> start container support. I've made some changes corresponding to Dan B's patch >> moving the lxc driver under libvirtd. I removed the isolation forks and cleaned >> up the status handling and PID storing. > > General comment over the patch is that I prefer to see all function with > an header comment, it's a bit painful, but helps graps the context when an > error arise and someone who don't know the code try to sort it out. That's > not a blocker for commits but better for long term maintainance :-) Yep, I'll try to add these as we go... > > [...] >> +/* Functions */ >> +static int lxcExecContainerInit(lxc_vm_def_t *vmDef) >> +{ >> + int rc = -1; >> + char* execString; >> + int execStringLen = strlen(vmDef->init) + 1 + 5; >> + >> + if(NULL == (execString = calloc(execStringLen, sizeof(char)))) { > > An error should really be reported here. Yep. > >> + goto error_out; >> + } >> + >> + strcpy(execString, "exec "); >> + strcat(execString, vmDef->init); > > Hum, it seems there is an off by one allocation error, you don't allocate > the space for the terminating 0 A comment probably would have helped here :-) the + 1 up there in setting the execStringLen is for the NULL. > >> + execl("/bin/sh", "sh", "-c", execString, (char*)NULL); >> + DEBUG("execl failed: %s", strerror(errno)); >> + >> +error_out: >> + exit(rc); >> +} > [...] > >> +static int lxcTtyForward(int fd1, int fd2, int *loopFlag, int pollmsecs) >> +{ >> + int rc = -1; >> + int i; >> + char buf[2]; >> + struct pollfd fds[2]; >> + int numFds = 0; >> + >> + if (0 <= fd1) { >> + fds[numFds].fd = fd1; >> + fds[numFds].events = POLLIN; >> + ++numFds; >> + } >> + >> + if (0 <= fd2) { >> + fds[numFds].fd = fd2; >> + fds[numFds].events = POLLIN; >> + ++numFds; >> + } >> + >> + if (0 == numFds) { >> + DEBUG0("No fds to monitor, return"); >> + goto cleanup; >> + } >> + >> + while (!(*loopFlag)) { >> + if ((rc = poll(fds, numFds, pollmsecs)) <= 0) { >> + if(*loopFlag) { >> + goto cleanup; >> + } >> + >> + if ((0 == rc) || (errno == EINTR) || (errno == EAGAIN)) { >> + continue; >> + } >> + >> + DEBUG("poll returned error: %s", strerror(errno)); >> + goto cleanup; >> + } >> + >> + for (i = 0; i < numFds; ++i) { >> + if (!fds[i].revents) { >> + continue; >> + } >> + >> + if (fds[i].revents & POLLIN) { >> + saferead(fds[i].fd, buf, 1); >> + if (1 < numFds) { >> + safewrite(fds[i ^ 1].fd, buf, 1); >> + } >> + > > So if only one fd is given we discard all data read, and if 2 fds are given > all data from one is forwarded to the other, right ? right > >> +static int exitChildLoop; >> +static void lxcExecChildHandler(int sig ATTRIBUTE_UNUSED, >> + siginfo_t *signalInfo, >> + void *context ATTRIBUTE_UNUSED) >> +{ >> + DEBUG("lxcExecChildHandler signal from %d\n", signalInfo->si_pid); >> + >> + if (signalInfo->si_pid == initPid) { >> + exitChildLoop = 1; >> + } else { >> + waitpid(signalInfo->si_pid, NULL, WNOHANG); >> + } >> + >> +} >> + >> +static int lxcExecWithTty(lxc_vm_t *vm) >> +{ >> + int rc = -1; >> + lxc_vm_def_t *vmDef = vm->def; >> + int ttymaster = -1; >> + int ttyslave = -1; >> + struct sigaction sigAction; >> + sigset_t sigMask; >> + int childStatus; >> + >> + if (lxcSetupContainerTty(&ttymaster, &ttyslave) < 0) { >> + goto exit_with_error; >> + } >> + >> + sigAction.sa_sigaction = lxcExecChildHandler; >> + sigfillset(&sigMask); >> + sigAction.sa_mask = sigMask; >> + sigAction.sa_flags = SA_SIGINFO; >> + if (0 != sigaction(SIGCHLD, &sigAction, NULL)) { >> + DEBUG("sigaction failed: %s\n", strerror(errno)); >> + goto exit_with_error; >> + } >> + >> + exitChildLoop = 0; >> + if ((initPid = fork()) == 0) { >> + if(lxcSetContainerStdio(ttyslave) < 0) { >> + exitChildLoop = 1; >> + goto exit_with_error; >> + } >> + >> + lxcExecContainerInit(vmDef); >> + /* this function will not return. if it fails, it will exit */ >> + } >> + >> + close(ttyslave); >> + lxcTtyForward(ttymaster, vm->parentTty, >> + &exitChildLoop, 100); >> + >> + DEBUG("child waiting on pid %d", initPid); >> + waitpid(initPid, &childStatus, 0); >> + rc = WEXITSTATUS(childStatus); >> + DEBUG("container exited with rc: %d", rc); >> + >> +exit_with_error: >> + exit(rc); >> +} > > So this is forked for each container created, right ? right > > [...] >> Index: b/src/lxc_container.h > ok > >> Index: b/src/lxc_driver.c >> =================================================================== >> --- a/src/lxc_driver.c 2008-03-21 11:46:11.000000000 -0700 >> +++ b/src/lxc_driver.c 2008-03-24 16:46:48.000000000 -0700 >> @@ -25,14 +25,17 @@ > [...] >> +static int lxcStartContainer(virConnectPtr conn, >> + lxc_driver_t* driver, >> + lxc_vm_t *vm) >> +{ >> + int rc = -1; >> + int flags; >> + int stacksize = getpagesize() * 4; >> + void *stack, *stacktop; >> + >> + /* allocate a stack for the container */ >> + stack = malloc(stacksize); >> + if (!stack) { >> + lxcError(conn, NULL, VIR_ERR_NO_MEMORY, >> + _("unable to allocate container stack")); >> + goto error_exit; >> + } >> + stacktop = (char*)stack + stacksize; >> + >> + flags = CLONE_NEWPID|CLONE_NEWNS|CLONE_NEWUTS|CLONE_NEWUSER|CLONE_NEWIPC|SIGCHLD; >> + >> + vm->pid = clone(lxcChild, stacktop, flags, (void *)vm); >> + >> + DEBUG("clone() returned, %d", vm->pid); >> + >> + if (vm->pid < 0) { >> + lxcError(conn, NULL, VIR_ERR_INTERNAL_ERROR, >> + _("clone() failed, %s"), strerror(errno)); >> + goto error_exit; >> + } >> + >> + vm->def->id = vm->pid; >> + lxcSaveConfig(NULL, driver, vm, vm->def); > > We should make sure at some point taht there is some kind of locking > mechanism when creating those config files, no ? Good question. Right now libvirtd should be the only process accessing the file. Is it multi-threaded that would cause collisions? The other possibility is if we found we needed to save the config file from within the container. That's not currently the case and I'd stay away from that if at all possible. > >> + rc = 0; >> + >> +error_exit: >> + return rc; >> +} > [...] >> +static int lxcVmStart(virConnectPtr conn, >> + lxc_driver_t * driver, >> + lxc_vm_t * vm) >> +{ >> + int rc = -1; >> + lxc_vm_def_t *vmDef = vm->def; >> + >> + /* open tty for the container */ >> + if(lxcSetupTtyTunnel(conn, vmDef, &vm->parentTty) < 0) { >> + goto cleanup; >> + } >> + >> + rc = lxcStartContainer(conn, driver, vm); >> + >> + if(rc == 0) { >> + vm->state = VIR_DOMAIN_RUNNING; >> + driver->ninactivevms--; >> + driver->nactivevms++; >> + } >> + >> +cleanup: >> + return rc; >> +} > [...] > > Hum, now that config names are saved based on domain names, wouldn't > it be better to use a name based lookup ? Less precise but more direct Not sure what you're referring to here. name based lookup for what? > > [...] >> >> static int lxcStartup(void) >> { >> @@ -459,7 +666,7 @@ >> NULL, /* getCapabilities */ >> lxcListDomains, /* listDomains */ >> lxcNumDomains, /* numOfDomains */ >> - NULL/*lxcDomainCreateLinux*/, /* domainCreateLinux */ >> + lxcDomainCreateAndStart, /* domainCreateLinux */ >> lxcDomainLookupByID, /* domainLookupByID */ >> lxcDomainLookupByUUID, /* domainLookupByUUID */ >> lxcDomainLookupByName, /* domainLookupByName */ >> @@ -483,7 +690,7 @@ >> lxcDomainDumpXML, /* domainDumpXML */ >> lxcListDefinedDomains, /* listDefinedDomains */ >> lxcNumDefinedDomains, /* numOfDefinedDomains */ >> - NULL, /* domainCreate */ >> + lxcDomainStart, /* domainCreate */ >> lxcDomainDefine, /* domainDefineXML */ >> lxcDomainUndefine, /* domainUndefine */ >> NULL, /* domainAttachDevice */ > > Looks fine overall. I wonder if 1 fork/clone per container can't be > reduced to a single process doing the fd processing for all the containers > running. But that's probably harder, more fragile and for little gain. Yes, that's been tossed around. I'm holding off pursuing that for now until devpts namespace and it's impacts are known. > > Daniel > Thanks! -- Best Regards, Dave Leskovec IBM Linux Technology Center Open Virtualization -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list