On Wed, Aug 13, 2008 at 10:57:11AM -0700, Dan Smith wrote: [snip huge pile of code] > I'd really like to see this mondo if block broken out into at least an > lxcPivotRoot() function, if not further. This is pretty long and > hairy, IMHO. That is very true. I've re-factored it into a whole bunch of funtions now, so take a look at this.... lxc_container.c | 337 +++++++++++++++++++++++++++++++++++++++++++++++++------- util.c | 12 - 2 files changed, 301 insertions(+), 48 deletions(-) Daniel diff -r 831362089d7c src/lxc_container.c --- a/src/lxc_container.c Wed Aug 27 13:04:30 2008 +0100 +++ b/src/lxc_container.c Wed Aug 27 13:21:35 2008 +0100 @@ -1,10 +1,12 @@ /* * Copyright IBM Corp. 2008 + * Copyright Red Hat 2008 * * lxc_container.c: file description * * Authors: * David L. Leskovec <dlesko at linux.vnet.ibm.com> + * Daniel P. Berrange <berrange@xxxxxxxxxx> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -26,10 +28,18 @@ #include <fcntl.h> #include <limits.h> #include <stdlib.h> +#include <stdio.h> #include <sys/ioctl.h> #include <sys/mount.h> #include <sys/wait.h> #include <unistd.h> +#include <mntent.h> + +/* Yes, we want linux private one, for _syscall2() macro */ +#include <linux/unistd.h> + +/* For MS_MOVE */ +#include <linux/fs.h> #include "lxc_container.h" #include "util.h" @@ -103,23 +113,15 @@ * * Returns 0 on success or -1 in case of error */ -static int lxcContainerSetStdio(int control, const char *ttyPath) +static int lxcContainerSetStdio(int control, int ttyfd) { int rc = -1; - int ttyfd; int open_max, i; if (setsid() < 0) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, _("setsid failed: %s"), strerror(errno)); - goto error_out; - } - - ttyfd = open(ttyPath, O_RDWR|O_NOCTTY); - if (ttyfd < 0) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("open(%s) failed: %s"), ttyPath, strerror(errno)); - goto error_out; + goto cleanup; } if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) { @@ -156,9 +158,6 @@ rc = 0; cleanup: - close(ttyfd); - -error_out: return rc; } @@ -221,6 +220,7 @@ return 0; } + /** * lxcEnableInterfaces: * @vm: Pointer to vm structure @@ -251,6 +251,279 @@ return rc; } + +//_syscall2(int, pivot_root, char *, newroot, const char *, oldroot) +extern int pivot_root(const char * new_root,const char * put_old); + +static int lxcContainerChildMountSort(const void *a, const void *b) +{ + const char **sa = (const char**)a; + const char **sb = (const char**)b; + + /* Delibrately reversed args - we need to unmount deepest + children first */ + return strcmp(*sb, *sa); +} + +static int lxcContainerPivotRoot(virDomainFSDefPtr root) +{ + char *oldroot; + + /* First step is to ensure the new root itself is + a mount point */ + if (mount(root->src, root->src, NULL, MS_BIND, NULL) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to bind new root %s: %s"), + root->src, strerror(errno)); + return -1; + } + + if (asprintf(&oldroot, "%s/.oldroot", root->src) < 0) { + oldroot = NULL; + lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL); + return -1; + } + + if (virFileMakePath(oldroot) < 0) { + VIR_FREE(oldroot); + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to create %s: %s"), + oldroot, strerror(errno)); + return -1; + } + + /* The old root directory will live at /.oldroot after + * this and will soon be unmounted completely */ + if (pivot_root(root->src, oldroot) < 0) { + VIR_FREE(oldroot); + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to pivot root %s to %s: %s"), + oldroot, root->src, strerror(errno)); + return -1; + } + VIR_FREE(oldroot); + + /* CWD is undefined after pivot_root, so go to / */ + if (chdir("/") < 0) { + return -1; + } + + return 0; +} + +static int lxcContainerPopulateDevices(void) +{ + int i; + const struct { + int maj; + int min; + mode_t mode; + const char *path; + } devs[] = { + { 1, 3, 0666, "/dev/null" }, + { 1, 5, 0666, "/dev/zero" }, + { 1, 7, 0666, "/dev/full" }, + { 5, 1, 0600, "/dev/console" }, + { 1, 8, 0666, "/dev/random" }, + { 1, 9, 0666, "/dev/urandom" }, + }; + + if (virFileMakePath("/dev") < 0 || + mount("none", "/dev", "tmpfs", 0, NULL) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to mount /dev tmpfs for container: %s"), + strerror(errno)); + return -1; + } + /* Move old devpts into container, since we have to + connect to the master ptmx which was opened in + the parent. + XXX This sucks, we need to figure out how to get our + own private devpts for isolation + */ + if (virFileMakePath("/dev/pts") < 0 || + mount("/.oldroot/dev/pts", "/dev/pts", NULL, + MS_MOVE, NULL) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to move /dev/pts into container: %s"), + strerror(errno)); + return -1; + } + + /* Populate /dev/ with a few important bits */ + for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) { + dev_t dev = makedev(devs[i].maj, devs[i].min); + if (mknod(devs[i].path, 0, dev) < 0 || + chmod(devs[i].path, devs[i].mode)) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to make device %s: %s"), + devs[i].path, strerror(errno)); + return -1; + } + } + + return 0; +} + + +static int lxcContainerMountNewFS(virDomainDefPtr vmDef) +{ + virDomainFSDefPtr tmp; + + /* Pull in rest of container's mounts */ + for (tmp = vmDef->fss; tmp; tmp = tmp->next) { + char *src; + if (STREQ(tmp->dst, "/")) + continue; + // XXX fix + if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT) + continue; + + if (asprintf(&src, "/.oldroot/%s", tmp->src) < 0) + return -1; + + if (virFileMakePath(tmp->dst) < 0 || + mount(src, tmp->dst, NULL, MS_BIND, NULL) < 0) { + VIR_FREE(src); + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to mount %s at %s for container: %s"), + tmp->src, tmp->dst, strerror(errno)); + return -1; + } + VIR_FREE(src); + } + return -1; +} + + +static int lxcContainerUnmountOldFS(void) +{ + struct mntent *mntent; + char **mounts = NULL; + int nmounts = 0; + FILE *procmnt; + int i; + + if (!(procmnt = setmntent("/proc/mounts", "r"))) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to read /proc/mounts: %s"), + strerror(errno)); + return -1; + } + while ((mntent = getmntent(procmnt)) != NULL) { + if (!STRPREFIX(mntent->mnt_dir, "/.oldroot")) + continue; + if (VIR_REALLOC_N(mounts, nmounts+1) < 0) { + endmntent(procmnt); + return -1; + } + if (!(mounts[nmounts++] = strdup(mntent->mnt_dir))) { + endmntent(procmnt); + return -1; + } + } + endmntent(procmnt); + + qsort(mounts, nmounts, sizeof(mounts[0]), + lxcContainerChildMountSort); + + for (i = 0 ; i < nmounts ; i++) { + if (umount(mounts[i]) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to unmount %s: %s"), + mounts[i], strerror(errno)); + return -1; + } + VIR_FREE(mounts[i]); + } + VIR_FREE(mounts); + + return 0; +} + + +/* Got a FS mapped to /, we're going the pivot_root + * approach to do a better-chroot-than-chroot + * this is based on this thread http://lkml.org/lkml/2008/3/5/29 + */ +static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef, + virDomainFSDefPtr root) +{ + if (lxcContainerPivotRoot(root) < 0) + return -1; + + if (virFileMakePath("/proc") < 0 || + mount("none", "/proc", "proc", 0, NULL) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to mount /proc for container: %s"), + strerror(errno)); + return -1; + } + + if (lxcContainerPopulateDevices() < 0) + return -1; + + if (lxcContainerMountNewFS(vmDef) < 0) + return -1; + + if (lxcContainerUnmountOldFS() < 0) + return -1; + + return 0; +} + +/* Nothing mapped to /, we're using the main root, + but with extra stuff mapped in */ +static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef) +{ + virDomainFSDefPtr tmp; + + for (tmp = vmDef->fss; tmp; tmp = tmp->next) { + // XXX fix to support other mount types + if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT) + continue; + + if (mount(tmp->src, + tmp->dst, + NULL, + MS_BIND, + NULL) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to mount %s at %s for container: %s"), + tmp->src, tmp->dst, strerror(errno)); + return -1; + } + } + + /* mount /proc */ + if (mount("lxcproc", "/proc", "proc", 0, NULL) < 0) { + lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, + _("failed to mount /proc for container: %s"), + strerror(errno)); + return -1; + } + + return 0; +} + +static int lxcContainerSetupMounts(virDomainDefPtr vmDef) +{ + virDomainFSDefPtr tmp; + virDomainFSDefPtr root = NULL; + + for (tmp = vmDef->fss; tmp && !root; tmp = tmp->next) { + if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT) + continue; + if (STREQ(tmp->dst, "/")) + root = tmp; + } + + if (root) + return lxcContainerSetupPivotRoot(vmDef, root); + else + return lxcContainerSetupExtraMounts(vmDef); +} + /** * lxcChild: * @argv: Pointer to container arguments @@ -265,11 +538,9 @@ */ static int lxcContainerChild( void *data ) { - int rc = -1; lxc_child_argv_t *argv = data; virDomainDefPtr vmDef = argv->config; - virDomainFSDefPtr curMount; - int i; + int ttyfd; if (NULL == vmDef) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -277,37 +548,21 @@ return -1; } - /* handle the bind mounts first before doing anything else that may */ - /* then access those mounted dirs */ - curMount = vmDef->fss; - for (i = 0; curMount; curMount = curMount->next) { - // XXX fix - if (curMount->type != VIR_DOMAIN_FS_TYPE_MOUNT) - continue; - rc = mount(curMount->src, - curMount->dst, - NULL, - MS_BIND, - NULL); - if (0 != rc) { - lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to mount %s at %s for container: %s"), - curMount->src, curMount->dst, strerror(errno)); - return -1; - } - } + if (lxcContainerSetupMounts(vmDef) < 0) + return -1; - /* mount /proc */ - rc = mount("lxcproc", "/proc", "proc", 0, NULL); - if (0 != rc) { + ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY); + if (ttyfd < 0) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, - _("failed to mount /proc for container: %s"), - strerror(errno)); + _("open(%s) failed: %s"), argv->ttyPath, strerror(errno)); return -1; } - if (lxcContainerSetStdio(argv->monitor, argv->ttyPath) < 0) + if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) { + close(ttyfd); return -1; + } + close(ttyfd); /* Wait for interface devices to show up */ if (lxcContainerWaitForContinue(argv->monitor) < 0) diff -r 831362089d7c src/util.c --- a/src/util.c Wed Aug 27 13:04:30 2008 +0100 +++ b/src/util.c Wed Aug 27 13:21:35 2008 +0100 @@ -616,13 +616,11 @@ if (!(p = strrchr(parent, '/'))) return EINVAL; - if (p == parent) - return EPERM; - - *p = '\0'; - - if ((err = virFileMakePath(parent))) - return err; + if (p != parent) { + *p = '\0'; + if ((err = virFileMakePath(parent))) + return err; + } if (mkdir(path, 0777) < 0 && errno != EEXIST) return errno; -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list