On Fri, Apr 17, 2009 at 09:39:19AM -0500, Serge E. Hallyn wrote: > Quoting Daniel P. Berrange (berrange@xxxxxxxxxx): > > > Calling unshare(CLONE_NEWNS) will not prevent the host OS from > > > seeing the new /dev/pts if / was MS_SHARED. That isn't taken > > > care of anywhere else for this process's namespace, is it? > > > > Yeah, so this is the place where I think we must still have a difference > > in our host setups. I'm testing this patch on a Fedora 11 host, and with > > my current code, the new /dev/pts is not visible in the host. > > Well I haven't tested your patch as is, was just looking at the code. > My pivot_root patch did a remount --make-slave, but I think it is only > for the container itself, whereas your patch here affects the driver > so it hasn't yet hit that remount, right? > > > So I can only assume this means my host / is *not* MS_SHARED, while > > If on your F11 host you look at > > cat /proc/self/mountinfo > > do the top lines show / as being shared? (Mine does) No, all the F11 installs I have just show '-' and I can't find any sign of something in the init process which makes them shared, so perhaps its some other software you've since installed ? For sake of testing though I've run 'mount --make-rshared /' on one of my systems and can now reproduce the behaviour you describe with the mount appearing for all processes. > > > yours is. I'm struggling to find out why this is different because > > I'm testing on an Fedora 11 up2date system. > > It's possible this is just something that has been changed since. > > > Anyway, would it be sufficiently to add in a call > > > > if (mount("", "/", NULL, MS_PRIVATE|MS_REC, NULL) < 0) { > > virReportSystemError(NULL, errno, "%s", > > _("failed to make root private")); > > goto cleanup; > > } > > Maybe the best thing to do would be: > > > if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { > > virReportSystemError(NULL, errno, "%s", > > _("failed to make root slave")); > > goto cleanup; > > } > > if (mount("", "/", NULL, MS_SHARED|MS_REC, NULL) < 0) { > > virReportSystemError(NULL, errno, "%s", > > _("failed to make root shared")); > > goto cleanup; > > } > > So we are making it slave (so it will receive mounts from the host > still), then shared (so the rest of the container will start out > shared). That, or just turn it SLAVE and leave it like that. This patch attached now just makes it MS_SLAVE. There's no need for the extra SHARED flag, since the only process libvirt_lxc spawns is the 'init' process inside the container and that immediately makes its own root private. Daniel diff -r fadb4a5b251a src/domain_conf.c --- a/src/domain_conf.c Mon Apr 20 10:53:47 2009 +0100 +++ b/src/domain_conf.c Mon Apr 20 11:00:58 2009 +0100 @@ -3856,6 +3858,21 @@ const char *virDomainDefDefaultEmulator( return emulator; } +virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def) +{ + int i; + + for (i = 0 ; i < def->nfss ; i++) { + if (def->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT) + continue; + + if (STREQ(def->fss[i]->dst, "/")) + return def->fss[i]; + } + + return NULL; +} + void virDomainObjLock(virDomainObjPtr obj) { diff -r fadb4a5b251a src/domain_conf.h --- a/src/domain_conf.h Mon Apr 20 10:53:47 2009 +0100 +++ b/src/domain_conf.h Mon Apr 20 11:00:58 2009 +0100 @@ -636,6 +636,8 @@ const char *virDomainDefDefaultEmulator( virDomainDefPtr def, virCapsPtr caps); +virDomainFSDefPtr virDomainGetRootFilesystem(virDomainDefPtr def); + void virDomainObjLock(virDomainObjPtr obj); void virDomainObjUnlock(virDomainObjPtr obj); diff -r fadb4a5b251a src/libvirt_private.syms --- a/src/libvirt_private.syms Mon Apr 20 10:53:47 2009 +0100 +++ b/src/libvirt_private.syms Mon Apr 20 11:00:58 2009 +0100 @@ -79,6 +79,7 @@ virDomainDiskQSort; virDomainFindByID; virDomainFindByName; virDomainFindByUUID; +virDomainGetRootFilesystem; virDomainGraphicsTypeFromString; virDomainGraphicsDefFree; virDomainInputDefFree; diff -r fadb4a5b251a src/lxc_container.c --- a/src/lxc_container.c Mon Apr 20 10:53:47 2009 +0100 +++ b/src/lxc_container.c Mon Apr 20 11:00:58 2009 +0100 @@ -308,7 +308,7 @@ static int lxcContainerPivotRoot(virDoma /* Create a tmpfs root since old and new roots must be * on separate filesystems */ - if (mount("", oldroot, "tmpfs", 0, NULL) < 0) { + if (mount("tmprootfs", oldroot, "tmpfs", 0, NULL) < 0) { virReportSystemError(NULL, errno, _("failed to mount empty tmpfs at %s"), oldroot); @@ -338,15 +338,9 @@ static int lxcContainerPivotRoot(virDoma /* Now we chroot into the tmpfs, then pivot into the * root->src bind-mounted onto '/new' */ - if (chroot(oldroot) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to chroot into tmpfs")); - goto err; - } - - if (chdir("/new") < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to chdir into /new on tmpfs")); + if (chdir(newroot) < 0) { + virReportSystemError(NULL, errno, + _("failed to chroot into %s"), newroot); goto err; } @@ -362,12 +356,6 @@ static int lxcContainerPivotRoot(virDoma if (chdir("/") < 0) goto err; - if (umount2(".oldroot", MNT_DETACH) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to lazily unmount old root")); - goto err; - } - ret = 0; err: @@ -377,10 +365,64 @@ err: return ret; } + +static int lxcContainerMountBasicFS(virDomainFSDefPtr root) +{ + const struct { + const char *src; + const char *dst; + const char *type; + } mnts[] = { + { "/dev", "/dev", "tmpfs" }, + { "/proc", "/proc", "proc" }, + { "/sys", "/sys", "sysfs" }, +#if WITH_SELINUX + { "none", "/selinux", "selinuxfs" }, +#endif + }; + int i, rc; + char *devpts; + + if (virAsprintf(&devpts, "/.oldroot%s/dev/pts", root->src) < 0) { + virReportOOMError(NULL); + return -1; + } + + for (i = 0 ; i < ARRAY_CARDINALITY(mnts) ; i++) { + if (virFileMakePath(mnts[i].dst) < 0) { + virReportSystemError(NULL, errno, + _("failed to mkdir %s"), + mnts[i].src); + return -1; + } + if (mount(mnts[i].src, mnts[i].dst, mnts[i].type, 0, NULL) < 0) { + virReportSystemError(NULL, errno, + _("failed to mount %s on %s"), + mnts[i].type, mnts[i].type); + return -1; + } + } + + if ((rc = virFileMakePath("/dev/pts") < 0)) { + virReportSystemError(NULL, rc, "%s", + _("cannot create /dev/pts")); + return -1; + } + + VIR_DEBUG("Trying to move %s to %s", devpts, "/dev/pts"); + if ((rc = mount(devpts, "/dev/pts", NULL, MS_MOVE, NULL)) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to mount /dev/pts in container")); + return -1; + } + VIR_FREE(devpts); + + return 0; +} + static int lxcContainerPopulateDevices(void) { int i; - int rc; const struct { int maj; int min; @@ -395,33 +437,6 @@ static int lxcContainerPopulateDevices(v { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/dev/urandom" }, }; - if ((rc = virFileMakePath("/dev")) < 0) { - virReportSystemError(NULL, rc, "%s", - _("cannot create /dev/")); - return -1; - } - if (mount("none", "/dev", "tmpfs", 0, NULL) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to mount /dev tmpfs")); - return -1; - } - /* Move old devpts into container, since we have to - connect to the master ptmx which was opened in - the parent. - XXX This sucks, we need to figure out how to get our - own private devpts for isolation - */ - if ((rc = virFileMakePath("/dev/pts") < 0)) { - virReportSystemError(NULL, rc, "%s", - _("cannot create /dev/pts")); - return -1; - } - if (mount("devpts", "/dev/pts", "devpts", 0, NULL) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to mount /dev/pts in container")); - return -1; - } - /* Populate /dev/ with a few important bits */ for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) { dev_t dev = makedev(devs[i].maj, devs[i].min); @@ -434,6 +449,23 @@ static int lxcContainerPopulateDevices(v } } + if (access("/dev/pts/ptmx", W_OK) == 0) { + if (symlink("/dev/pts/ptmx", "/dev/ptmx") < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to create symlink /dev/ptmx to /dev/pts/ptmx")); + return -1; + } + } else { + dev_t dev = makedev(LXC_DEV_MAJ_TTY, LXC_DEV_MIN_PTMX); + if (mknod("/dev/ptmx", 0, dev) < 0 || + chmod("/dev/ptmx", 0666)) { + virReportSystemError(NULL, errno, "%s", + _("failed to make device /dev/ptmx")); + return -1; + } + } + + return 0; } @@ -493,6 +525,7 @@ static int lxcContainerUnmountOldFS(void return -1; } while (getmntent_r(procmnt, &mntent, mntbuf, sizeof(mntbuf)) != NULL) { + VIR_DEBUG("Got %s", mntent.mnt_dir); if (!STRPREFIX(mntent.mnt_dir, "/.oldroot")) continue; @@ -513,6 +546,7 @@ static int lxcContainerUnmountOldFS(void lxcContainerChildMountSort); for (i = 0 ; i < nmounts ; i++) { + VIR_DEBUG("Umount %s", mounts[i]); if (umount(mounts[i]) < 0) { virReportSystemError(NULL, errno, _("failed to unmount '%s'"), @@ -534,22 +568,23 @@ static int lxcContainerUnmountOldFS(void static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef, virDomainFSDefPtr root) { + /* Gives us a private root, leaving all parent OS mounts on /.oldroot */ if (lxcContainerPivotRoot(root) < 0) return -1; - if (virFileMakePath("/proc") < 0 || - mount("none", "/proc", "proc", 0, NULL) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to mount /proc")); + /* Mounts the core /proc, /sys, /dev, /dev/pts filesystems */ + if (lxcContainerMountBasicFS(root) < 0) return -1; - } + /* Populates device nodes in /dev/ */ if (lxcContainerPopulateDevices() < 0) return -1; + /* Sets up any non-root mounts from guest config */ if (lxcContainerMountNewFS(vmDef) < 0) return -1; + /* Gets rid of all remaining mounts from host OS, including /.oldroot itself */ if (lxcContainerUnmountOldFS() < 0) return -1; @@ -595,18 +630,9 @@ static int lxcContainerSetupExtraMounts( return 0; } -static int lxcContainerSetupMounts(virDomainDefPtr vmDef) +static int lxcContainerSetupMounts(virDomainDefPtr vmDef, + virDomainFSDefPtr root) { - int i; - virDomainFSDefPtr root = NULL; - - for (i = 0 ; i < vmDef->nfss ; i++) { - if (vmDef->fss[i]->type != VIR_DOMAIN_FS_TYPE_MOUNT) - continue; - if (STREQ(vmDef->fss[i]->dst, "/")) - root = vmDef->fss[i]; - } - if (root) return lxcContainerSetupPivotRoot(vmDef, root); else @@ -630,6 +656,8 @@ static int lxcContainerChild( void *data lxc_child_argv_t *argv = data; virDomainDefPtr vmDef = argv->config; int ttyfd; + char *ttyPath; + virDomainFSDefPtr root; if (NULL == vmDef) { lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR, @@ -637,16 +665,28 @@ static int lxcContainerChild( void *data return -1; } - if (lxcContainerSetupMounts(vmDef) < 0) - return -1; + root = virDomainGetRootFilesystem(vmDef); - ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY); + if (root) { + if (virAsprintf(&ttyPath, "%s%s", root->src, argv->ttyPath) < 0) { + virReportOOMError(NULL); + return -1; + } + } else { + if (!(ttyPath = strdup(argv->ttyPath))) { + virReportOOMError(NULL); + return -1; + } + } + + ttyfd = open(ttyPath, O_RDWR|O_NOCTTY); if (ttyfd < 0) { virReportSystemError(NULL, errno, - _("failed to open %s"), - argv->ttyPath); + _("failed to open tty %s"), + ttyPath); return -1; } + VIR_FREE(ttyPath); if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) { close(ttyfd); @@ -654,6 +694,9 @@ static int lxcContainerChild( void *data } close(ttyfd); + if (lxcContainerSetupMounts(vmDef, root) < 0) + return -1; + /* Wait for interface devices to show up */ if (lxcContainerWaitForContinue(argv->monitor) < 0) return -1; diff -r fadb4a5b251a src/lxc_container.h --- a/src/lxc_container.h Mon Apr 20 10:53:47 2009 +0100 +++ b/src/lxc_container.h Mon Apr 20 11:00:58 2009 +0100 @@ -39,6 +39,7 @@ enum { #define LXC_DEV_MAJ_TTY 5 #define LXC_DEV_MIN_CONSOLE 1 +#define LXC_DEV_MIN_PTMX 2 #define LXC_DEV_MAJ_PTY 136 diff -r fadb4a5b251a src/lxc_controller.c --- a/src/lxc_controller.c Mon Apr 20 10:53:47 2009 +0100 +++ b/src/lxc_controller.c Mon Apr 20 11:00:58 2009 +0100 @@ -33,6 +33,7 @@ #include <fcntl.h> #include <signal.h> #include <getopt.h> +#include <sys/mount.h> #include "virterror_internal.h" #include "logging.h" @@ -426,6 +427,13 @@ static int lxcControllerCleanupInterface return 0; } +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) +#endif static int lxcControllerRun(virDomainDefPtr def, @@ -440,6 +448,9 @@ lxcControllerRun(virDomainDefPtr def, int containerPty; char *containerPtyPath; pid_t container = -1; + virDomainFSDefPtr root; + char *devpts = NULL; + char *devptmx = NULL; if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) { virReportSystemError(NULL, errno, "%s", @@ -447,14 +458,91 @@ lxcControllerRun(virDomainDefPtr def, goto cleanup; } - if (virFileOpenTty(&containerPty, - &containerPtyPath, - 0) < 0) { - virReportSystemError(NULL, errno, "%s", - _("failed to allocate tty")); - goto cleanup; + root = virDomainGetRootFilesystem(def); + + /* + * If doing a chroot style setup, we need to prepare + * a private /dev/pts for the child now, which they + * will later move into position. + * + * This is complex because 'virsh console' needs to + * use /dev/pts from the host OS, and the guest OS + * needs to use /dev/pts from the guest. + * + * This means that we (libvirt_lxc) need to see and + * use both /dev/pts instances. We're running in the + * host OS context though and don't want to expose + * the guest OS /dev/pts there. + * + * Thus we call unshare(CLONE_NS) so that we can see + * the guest's new /dev/pts, without it becoming + * visible to the host OS. We also put the root FS + * into slave mode, just in case it was currently + * marked as shared + */ + if (root) { + VIR_DEBUG0("Setting up private /dev/pts"); + if (unshare(CLONE_NEWNS) < 0) { + virReportSystemError(NULL, errno, "%s", + _("cannot unshare mount namespace")); + goto cleanup; + } + + if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to switch root mount into slave mode")); + goto cleanup; + } + + if (virAsprintf(&devpts, "%s/dev/pts", root->src) < 0 || + virAsprintf(&devptmx, "%s/dev/pts/ptmx", root->src) < 0) { + virReportOOMError(NULL); + goto cleanup; + } + + if (virFileMakePath(devpts) < 0) { + virReportSystemError(NULL, errno, + _("failed to make path %s"), + devpts); + goto cleanup; + } + + VIR_DEBUG("Mouting 'devpts' on %s", devpts); + if (mount("devpts", devpts, "devpts", 0, "newinstance,ptmxmode=0666") < 0) { + virReportSystemError(NULL, errno, + _("failed to mount devpts on %s"), + devpts); + goto cleanup; + } + + if (access(devptmx, R_OK) < 0) { + VIR_WARN0("kernel does not support private devpts, using shared devpts"); + VIR_FREE(devptmx); + } } + if (devptmx) { + VIR_DEBUG("Opening tty on private %s", devptmx); + if (virFileOpenTtyAt(devptmx, + &containerPty, + &containerPtyPath, + 0) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to allocate tty")); + goto cleanup; + } + } else { + VIR_DEBUG0("Opening tty on shared /dev/ptmx"); + if (virFileOpenTty(&containerPty, + &containerPtyPath, + 0) < 0) { + virReportSystemError(NULL, errno, "%s", + _("failed to allocate tty")); + goto cleanup; + } + } + + if (lxcSetContainerResources(def) < 0) goto cleanup; @@ -476,6 +564,8 @@ lxcControllerRun(virDomainDefPtr def, rc = lxcControllerMain(monitor, client, appPty, containerPty); cleanup: + VIR_FREE(devptmx); + VIR_FREE(devpts); if (control[0] != -1) close(control[0]); if (control[1] != -1) diff -r fadb4a5b251a src/util.c --- a/src/util.c Mon Apr 20 10:53:47 2009 +0100 +++ b/src/util.c Mon Apr 20 11:00:58 2009 +0100 @@ -1050,14 +1050,25 @@ int virFileBuildPath(const char *dir, } -#ifdef __linux__ int virFileOpenTty(int *ttymaster, char **ttyName, int rawmode) { + return virFileOpenTtyAt("/dev/ptmx", + ttymaster, + ttyName, + rawmode); +} + +#ifdef __linux__ +int virFileOpenTtyAt(const char *ptmx, + int *ttymaster, + char **ttyName, + int rawmode) +{ int rc = -1; - if ((*ttymaster = posix_openpt(O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0) + if ((*ttymaster = open(ptmx, O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0) goto cleanup; if (unlockpt(*ttymaster) < 0) @@ -1100,9 +1111,10 @@ cleanup: } #else -int virFileOpenTty(int *ttymaster ATTRIBUTE_UNUSED, - char **ttyName ATTRIBUTE_UNUSED, - int rawmode ATTRIBUTE_UNUSED) +int virFileOpenTtyAt(const char *ptmx ATTRIBUTE_UNUSED, + int *ttymaster ATTRIBUTE_UNUSED, + char **ttyName ATTRIBUTE_UNUSED, + int rawmode ATTRIBUTE_UNUSED) { return -1; } diff -r fadb4a5b251a src/util.h --- a/src/util.h Mon Apr 20 10:53:47 2009 +0100 +++ b/src/util.h Mon Apr 20 11:00:58 2009 +0100 @@ -103,6 +103,10 @@ int virFileBuildPath(const char *dir, int virFileOpenTty(int *ttymaster, char **ttyName, int rawmode); +int virFileOpenTtyAt(const char *ptmx, + int *ttymaster, + char **ttyName, + int rawmode); char* virFilePid(const char *dir, const char *name); -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- Libvir-list mailing list Libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list