Re: [libvirt] PATCH: 4/4: Add pivot_root support to container

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Aug 27, 2008 at 09:58:58PM +0100, Daniel P. Berrange wrote:
> On Wed, Aug 27, 2008 at 01:32:13PM -0700, Dan Smith wrote:
> > DB> +static int lxcContainerMountNewFS(virDomainDefPtr vmDef)
> > DB> +{
> > DB> +    virDomainFSDefPtr tmp;
....
> > DB> +    return -1;
> > 
> > Shouldn't this be "return 0"?  AFAICT, this means this function will
> > always fail and thus any domain with a root target will fail to start.
> > If I change this to "return 0" I'm able to start such guests, with
> > properly pivoted roots.
> 
> Yes, it clearly should be return 0.
> 
> > On a more general note, it seems like there are a lot of places where
> > failures trigger a "return -1" that rolls completely up the stack with
> > no error information getting logged.  Since you have the excellent
> > per-container logging functionality, can we increase the verbosity a
> > little so that there is some way to diagnose where things are failing?
> > Thus far, I've just started sprinkling fprintf()'s into the code until I
> > start to narrow things down.  I'd be glad to help with that after this
> > goes in.
> 
> The newly improved virExec() API which LXC now uses ensures that libvirt's
> error callback is reset to the default in child processes. This means that
> any call to virRaiseError in the child is turned into a fprintf(stderr...).
> We also wire the container stdout/err to /var/log/libvirt/lxc/NAME.log
> So if everything is operating as I expect, all the lxcError() calls in
> this code should result in log messages being written out to /var/log.

Of course in this particular scenario you would never have had a log 
message because this should never have been a 'return -1'. I found a
couple of other places with missing lxcError() calls. So here's an
updated patch

diff -r 4d49719aa768 src/lxc_container.c
--- a/src/lxc_container.c	Wed Aug 27 22:19:33 2008 +0100
+++ b/src/lxc_container.c	Wed Aug 27 22:21:15 2008 +0100
@@ -1,10 +1,12 @@
 /*
  * Copyright IBM Corp. 2008
+ * Copyright Red Hat 2008
  *
  * lxc_container.c: file description
  *
  * Authors:
  *  David L. Leskovec <dlesko at linux.vnet.ibm.com>
+ *  Daniel P. Berrange <berrange@xxxxxxxxxx>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -26,10 +28,18 @@
 #include <fcntl.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <stdio.h>
 #include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/wait.h>
 #include <unistd.h>
+#include <mntent.h>
+
+/* Yes, we want linux private one, for _syscall2() macro */
+#include <linux/unistd.h>
+
+/* For MS_MOVE */
+#include <linux/fs.h>
 
 #include "lxc_container.h"
 #include "util.h"
@@ -103,23 +113,15 @@
  *
  * Returns 0 on success or -1 in case of error
  */
-static int lxcContainerSetStdio(int control, const char *ttyPath)
+static int lxcContainerSetStdio(int control, int ttyfd)
 {
     int rc = -1;
-    int ttyfd;
     int open_max, i;
 
     if (setsid() < 0) {
         lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
                  _("setsid failed: %s"), strerror(errno));
-        goto error_out;
-    }
-
-    ttyfd = open(ttyPath, O_RDWR|O_NOCTTY);
-    if (ttyfd < 0) {
-        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("open(%s) failed: %s"), ttyPath, strerror(errno));
-        goto error_out;
+        goto cleanup;
     }
 
     if (ioctl(ttyfd, TIOCSCTTY, NULL) < 0) {
@@ -156,9 +158,6 @@
     rc = 0;
 
 cleanup:
-    close(ttyfd);
-
-error_out:
     return rc;
 }
 
@@ -221,6 +220,7 @@
     return 0;
 }
 
+
 /**
  * lxcEnableInterfaces:
  * @vm: Pointer to vm structure
@@ -251,6 +251,285 @@
     return rc;
 }
 
+
+//_syscall2(int, pivot_root, char *, newroot, const char *, oldroot)
+extern int pivot_root(const char * new_root,const char * put_old);
+
+static int lxcContainerChildMountSort(const void *a, const void *b)
+{
+  const char **sa = (const char**)a;
+  const char **sb = (const char**)b;
+
+  /* Delibrately reversed args - we need to unmount deepest
+     children first */
+  return strcmp(*sb, *sa);
+}
+
+static int lxcContainerPivotRoot(virDomainFSDefPtr root)
+{
+    char *oldroot;
+
+    /* First step is to ensure the new root itself is
+       a mount point */
+    if (mount(root->src, root->src, NULL, MS_BIND, NULL) < 0) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to bind new root %s: %s"),
+                 root->src, strerror(errno));
+        return -1;
+    }
+
+    if (asprintf(&oldroot, "%s/.oldroot", root->src) < 0) {
+        oldroot = NULL;
+        lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+        return -1;
+    }
+
+    if (virFileMakePath(oldroot) < 0) {
+        VIR_FREE(oldroot);
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to create %s: %s"),
+                 oldroot, strerror(errno));
+        return -1;
+    }
+
+    /* The old root directory will live at /.oldroot after
+     * this and will soon be unmounted completely */
+    if (pivot_root(root->src, oldroot) < 0) {
+        VIR_FREE(oldroot);
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to pivot root %s to %s: %s"),
+                 oldroot, root->src, strerror(errno));
+        return -1;
+    }
+    VIR_FREE(oldroot);
+
+    /* CWD is undefined after pivot_root, so go to / */
+    if (chdir("/") < 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int lxcContainerPopulateDevices(void)
+{
+    int i;
+    const struct {
+        int maj;
+        int min;
+        mode_t mode;
+        const char *path;
+    } devs[] = {
+        { 1, 3, 0666, "/dev/null" },
+        { 1, 5, 0666, "/dev/zero" },
+        { 1, 7, 0666, "/dev/full" },
+        { 5, 1, 0600, "/dev/console" },
+        { 1, 8, 0666, "/dev/random" },
+        { 1, 9, 0666, "/dev/urandom" },
+    };
+
+    if (virFileMakePath("/dev") < 0 ||
+        mount("none", "/dev", "tmpfs", 0, NULL) < 0) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to mount /dev tmpfs for container: %s"),
+                 strerror(errno));
+        return -1;
+    }
+    /* Move old devpts into container, since we have to
+       connect to the master ptmx which was opened in
+       the parent.
+       XXX This sucks, we need to figure out how to get our
+       own private devpts for isolation
+    */
+    if (virFileMakePath("/dev/pts") < 0 ||
+        mount("/.oldroot/dev/pts", "/dev/pts", NULL,
+              MS_MOVE, NULL) < 0) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to move /dev/pts into container: %s"),
+                 strerror(errno));
+        return -1;
+    }
+
+    /* Populate /dev/ with a few important bits */
+    for (i = 0 ; i < ARRAY_CARDINALITY(devs) ; i++) {
+        dev_t dev = makedev(devs[i].maj, devs[i].min);
+        if (mknod(devs[i].path, 0, dev) < 0 ||
+            chmod(devs[i].path, devs[i].mode)) {
+            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                     _("failed to make device %s: %s"),
+                     devs[i].path, strerror(errno));
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+
+static int lxcContainerMountNewFS(virDomainDefPtr vmDef)
+{
+    virDomainFSDefPtr tmp;
+
+    /* Pull in rest of container's mounts */
+    for (tmp = vmDef->fss; tmp; tmp = tmp->next) {
+        char *src;
+        if (STREQ(tmp->dst, "/"))
+            continue;
+        // XXX fix
+        if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+            continue;
+
+        if (asprintf(&src, "/.oldroot/%s", tmp->src) < 0) {
+            lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+            return -1;
+        }
+
+        if (virFileMakePath(tmp->dst) < 0 ||
+            mount(src, tmp->dst, NULL, MS_BIND, NULL) < 0) {
+            VIR_FREE(src);
+            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                     _("failed to mount %s at %s for container: %s"),
+                     tmp->src, tmp->dst, strerror(errno));
+            return -1;
+        }
+        VIR_FREE(src);
+    }
+
+    return 0;
+}
+
+
+static int lxcContainerUnmountOldFS(void)
+{
+    struct mntent *mntent;
+    char **mounts = NULL;
+    int nmounts = 0;
+    FILE *procmnt;
+    int i;
+
+    if (!(procmnt = setmntent("/proc/mounts", "r"))) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to read /proc/mounts: %s"),
+                 strerror(errno));
+        return -1;
+    }
+    while ((mntent = getmntent(procmnt)) != NULL) {
+        if (!STRPREFIX(mntent->mnt_dir, "/.oldroot"))
+            continue;
+
+        if (VIR_REALLOC_N(mounts, nmounts+1) < 0) {
+            endmntent(procmnt);
+            lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+            return -1;
+        }
+        if (!(mounts[nmounts++] = strdup(mntent->mnt_dir))) {
+            endmntent(procmnt);
+            lxcError(NULL, NULL, VIR_ERR_NO_MEMORY, NULL);
+            return -1;
+        }
+    }
+    endmntent(procmnt);
+
+    qsort(mounts, nmounts, sizeof(mounts[0]),
+          lxcContainerChildMountSort);
+
+    for (i = 0 ; i < nmounts ; i++) {
+        if (umount(mounts[i]) < 0) {
+            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                     _("failed to unmount %s: %s"),
+                     mounts[i], strerror(errno));
+            return -1;
+        }
+        VIR_FREE(mounts[i]);
+    }
+    VIR_FREE(mounts);
+
+    return 0;
+}
+
+
+/* Got a FS mapped to /, we're going the pivot_root
+ * approach to do a better-chroot-than-chroot
+ * this is based on this thread http://lkml.org/lkml/2008/3/5/29
+ */
+static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef,
+                                      virDomainFSDefPtr root)
+{
+    if (lxcContainerPivotRoot(root) < 0)
+        return -1;
+
+    if (virFileMakePath("/proc") < 0 ||
+        mount("none", "/proc", "proc", 0, NULL) < 0) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to mount /proc for container: %s"),
+                 strerror(errno));
+        return -1;
+    }
+
+    if (lxcContainerPopulateDevices() < 0)
+        return -1;
+
+    if (lxcContainerMountNewFS(vmDef) < 0)
+        return -1;
+
+    if (lxcContainerUnmountOldFS() < 0)
+        return -1;
+
+    return 0;
+}
+
+/* Nothing mapped to /, we're using the main root,
+   but with extra stuff mapped in */
+static int lxcContainerSetupExtraMounts(virDomainDefPtr vmDef)
+{
+    virDomainFSDefPtr tmp;
+
+    for (tmp = vmDef->fss; tmp; tmp = tmp->next) {
+        // XXX fix to support other mount types
+        if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+            continue;
+
+        if (mount(tmp->src,
+                  tmp->dst,
+                  NULL,
+                  MS_BIND,
+                  NULL) < 0) {
+            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                     _("failed to mount %s at %s for container: %s"),
+                     tmp->src, tmp->dst, strerror(errno));
+            return -1;
+        }
+    }
+
+    /* mount /proc */
+    if (mount("lxcproc", "/proc", "proc", 0, NULL) < 0) {
+        lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
+                 _("failed to mount /proc for container: %s"),
+                 strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int lxcContainerSetupMounts(virDomainDefPtr vmDef)
+{
+    virDomainFSDefPtr tmp;
+    virDomainFSDefPtr root = NULL;
+
+    for (tmp = vmDef->fss; tmp && !root; tmp = tmp->next) {
+        if (tmp->type != VIR_DOMAIN_FS_TYPE_MOUNT)
+            continue;
+        if (STREQ(tmp->dst, "/"))
+            root = tmp;
+    }
+
+    if (root)
+        return lxcContainerSetupPivotRoot(vmDef, root);
+    else
+        return lxcContainerSetupExtraMounts(vmDef);
+}
+
 /**
  * lxcChild:
  * @argv: Pointer to container arguments
@@ -265,11 +544,9 @@
  */
 static int lxcContainerChild( void *data )
 {
-    int rc = -1;
     lxc_child_argv_t *argv = data;
     virDomainDefPtr vmDef = argv->config;
-    virDomainFSDefPtr curMount;
-    int i;
+    int ttyfd;
 
     if (NULL == vmDef) {
         lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
@@ -277,37 +554,21 @@
         return -1;
     }
 
-    /* handle the bind mounts first before doing anything else that may */
-    /* then access those mounted dirs */
-    curMount = vmDef->fss;
-    for (i = 0; curMount; curMount = curMount->next) {
-        // XXX fix
-        if (curMount->type != VIR_DOMAIN_FS_TYPE_MOUNT)
-            continue;
-        rc = mount(curMount->src,
-                   curMount->dst,
-                   NULL,
-                   MS_BIND,
-                   NULL);
-        if (0 != rc) {
-            lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                     _("failed to mount %s at %s for container: %s"),
-                     curMount->src, curMount->dst, strerror(errno));
-            return -1;
-        }
-    }
+    if (lxcContainerSetupMounts(vmDef) < 0)
+        return -1;
 
-    /* mount /proc */
-    rc = mount("lxcproc", "/proc", "proc", 0, NULL);
-    if (0 != rc) {
+    ttyfd = open(argv->ttyPath, O_RDWR|O_NOCTTY);
+    if (ttyfd < 0) {
         lxcError(NULL, NULL, VIR_ERR_INTERNAL_ERROR,
-                 _("failed to mount /proc for container: %s"),
-                 strerror(errno));
+                 _("open(%s) failed: %s"), argv->ttyPath, strerror(errno));
         return -1;
     }
 
-    if (lxcContainerSetStdio(argv->monitor, argv->ttyPath) < 0)
+    if (lxcContainerSetStdio(argv->monitor, ttyfd) < 0) {
+        close(ttyfd);
         return -1;
+    }
+    close(ttyfd);
 
     /* Wait for interface devices to show up */
     if (lxcContainerWaitForContinue(argv->monitor) < 0)
diff -r 4d49719aa768 src/util.c
--- a/src/util.c	Wed Aug 27 22:19:33 2008 +0100
+++ b/src/util.c	Wed Aug 27 22:21:15 2008 +0100
@@ -616,13 +616,11 @@
     if (!(p = strrchr(parent, '/')))
         return EINVAL;
 
-    if (p == parent)
-        return EPERM;
-
-    *p = '\0';
-
-    if ((err = virFileMakePath(parent)))
-        return err;
+    if (p != parent) {
+        *p = '\0';
+        if ((err = virFileMakePath(parent)))
+            return err;
+    }
 
     if (mkdir(path, 0777) < 0 && errno != EEXIST)
         return errno;


Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

--
Libvir-list mailing list
Libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list

[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]