Some programs want to change some values for the network interfaces configuration in /proc/sys/net/ipv[46] folders. Giving RW access on them allows wicked to work on openSUSE 13.2+. In order to mount those folders RW but keep the rest of /proc/sys RO, we add temporary mounts for these folders before bind-mounting /proc/sys. Those mounts will be skipped if the container doesn't have its own network namespace. It may happen that one of the temporary mounts in /proc/ filesystem isn't available due to a missing kernel feature. We need not to fail in that case. --- Diffs to v1: * Only mount the /proc/sys/net/ipv[46] if the container has its own netns * Don't test for the existence of files in /proc before mounting them: they may not be ready when checking. Instead try to mount them and skip them if the source doesn't exist. * Use existing lxcNeedNetworkNamespace to tell lxcContainerMountBasicFS if we have our own netns: at least we now have the proper value. src/lxc/lxc_container.c | 153 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 121 insertions(+), 32 deletions(-) diff --git a/src/lxc/lxc_container.c b/src/lxc/lxc_container.c index 3b08b86..140d54f 100644 --- a/src/lxc/lxc_container.c +++ b/src/lxc/lxc_container.c @@ -800,15 +800,21 @@ typedef struct { int mflags; bool skipUserNS; bool skipUnmounted; + bool skipNoPrivNet; + bool temporary; } virLXCBasicMountInfo; static const virLXCBasicMountInfo lxcBasicMounts[] = { - { "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false }, - { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_RDONLY, false, false }, - { "sysfs", "/sys", "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false }, - { "securityfs", "/sys/kernel/security", "securityfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true }, + { "proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, false, false, false, false }, + { "/proc/sys/net/ipv4", "TMP1", NULL, MS_BIND, false, false, true, true }, + { "/proc/sys/net/ipv6", "TMP2", NULL, MS_BIND, false, false, true, true }, + { "/proc/sys", "/proc/sys", NULL, MS_BIND|MS_RDONLY, false, false, false, false }, + { "TMP1", "/proc/sys/net/ipv4", NULL, MS_BIND, false, false, true, false }, + { "TMP2", "/proc/sys/net/ipv6", NULL, MS_BIND, false, false, true, false }, + { "sysfs", "/sys", "sysfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, false, false, false, false }, + { "securityfs", "/sys/kernel/security", "securityfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true, false, false }, #if WITH_SELINUX - { SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true }, + { SELINUX_MOUNT, SELINUX_MOUNT, "selinuxfs", MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY, true, true, false, false }, #endif }; @@ -885,14 +891,23 @@ static int lxcContainerSetReadOnly(void) static int lxcContainerMountBasicFS(bool userns_enabled, bool netns_disabled) { - size_t i; + size_t i, j; int rc = -1; char* mnt_src = NULL; + char* mnt_dst = NULL; int mnt_mflags; + char **tmpkeys = NULL; + char **tmppaths = NULL; + size_t nmounts = ARRAY_CARDINALITY(lxcBasicMounts); VIR_DEBUG("Mounting basic filesystems"); - for (i = 0; i < ARRAY_CARDINALITY(lxcBasicMounts); i++) { + if (VIR_ALLOC_N(tmpkeys, nmounts) < 0 || + VIR_ALLOC_N(tmppaths, nmounts) < 0) { + goto cleanup; + } + + for (i = 0; i < nmounts; i++) { bool bindOverReadonly; virLXCBasicMountInfo const *mnt = &lxcBasicMounts[i]; @@ -906,11 +921,41 @@ static int lxcContainerMountBasicFS(bool userns_enabled, goto cleanup; mnt_mflags = MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY|MS_BIND; } else { - if (VIR_STRDUP(mnt_src, mnt->src) < 0) + bool foundKey = false; + /* Look for potential temporary folder match */ + for (j = 0; j < i; j++) { + if (STREQ_NULLABLE(mnt->src, tmpkeys[j])) { + /* We found the key without path: skip */ + foundKey = true; + if (tmppaths[j] && VIR_STRDUP(mnt_src, tmppaths[j]) < 0) + goto cleanup; + break; + } + } + if (foundKey && !mnt_src) + continue; + if (!mnt_src && VIR_STRDUP(mnt_src, mnt->src) < 0) goto cleanup; mnt_mflags = mnt->mflags; } + if (mnt->temporary) { + char tmppath[] = "/tmp/mount-XXXXXX"; + if (mkdtemp(tmppath) == NULL) { + virReportSystemError(errno, + _("Failed to create temporary folder %s"), + tmppath); + } + if (VIR_STRDUP(tmppaths[i], tmppath) < 0 || + VIR_STRDUP(tmpkeys[i], mnt->dst) < 0 || + VIR_STRDUP(mnt_dst, tmppath) < 0) { + goto cleanup; + } + } else { + if (VIR_STRDUP(mnt_dst, mnt->dst) < 0) + goto cleanup; + } + VIR_DEBUG("Processing %s -> %s", mnt_src, mnt->dst); @@ -930,6 +975,7 @@ static int lxcContainerMountBasicFS(bool userns_enabled, VIR_DEBUG("Skipping '%s' which isn't mounted in host", mnt->dst); VIR_FREE(mnt_src); + VIR_FREE(mnt_dst); continue; } } @@ -937,13 +983,21 @@ static int lxcContainerMountBasicFS(bool userns_enabled, if (mnt->skipUserNS && userns_enabled) { VIR_DEBUG("Skipping due to user ns enablement"); VIR_FREE(mnt_src); + VIR_FREE(mnt_dst); + continue; + } + + if (mnt->skipNoPrivNet && netns_disabled) { + VIR_DEBUG("Skipping due to absence of network namespace"); + VIR_FREE(mnt_src); + VIR_FREE(mnt_dst); continue; } - if (virFileMakePath(mnt->dst) < 0) { + if (virFileMakePath(mnt_dst) < 0) { virReportSystemError(errno, _("Failed to mkdir %s"), - mnt_src); + mnt_dst); goto cleanup; } @@ -957,32 +1011,67 @@ static int lxcContainerMountBasicFS(bool userns_enabled, bindOverReadonly = !!(mnt_mflags & MS_RDONLY); VIR_DEBUG("Mount %s on %s type=%s flags=%x", - mnt_src, mnt->dst, mnt->type, mnt_mflags & ~MS_RDONLY); - if (mount(mnt_src, mnt->dst, mnt->type, mnt_mflags & ~MS_RDONLY, NULL) < 0) { + mnt_src, mnt_dst, mnt->type, mnt_mflags & ~MS_RDONLY); + if (mount(mnt_src, mnt_dst, mnt->type, mnt_mflags & ~MS_RDONLY, NULL) < 0) { + /* Don't shout if some folder doesn't exist in /proc since they + * can be depending on an unloaded kernel module. */ + if (errno == ENOENT && STRPREFIX(mnt_src, "/proc/")) { + VIR_DEBUG("Skipped: %s doesn't exist", mnt_src); + /* Cleanup the temporary path */ + if (virFileDeleteTree(tmppaths[i]) < 0) + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to remove temporary folder %s"), + tmppaths[i]); + VIR_FREE(tmppaths[i]); + + VIR_FREE(mnt_src); + VIR_FREE(mnt_dst); + continue; + } virReportSystemError(errno, _("Failed to mount %s on %s type %s flags=%x"), - mnt_src, mnt->dst, NULLSTR(mnt->type), + mnt_src, mnt_dst, NULLSTR(mnt->type), mnt_mflags & ~MS_RDONLY); goto cleanup; } if (bindOverReadonly && - mount(mnt_src, mnt->dst, NULL, + mount(mnt_src, mnt_dst, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL) < 0) { virReportSystemError(errno, _("Failed to re-mount %s on %s flags=%x"), - mnt_src, mnt->dst, + mnt_src, mnt_dst, MS_BIND|MS_REMOUNT|MS_RDONLY); goto cleanup; } VIR_FREE(mnt_src); + VIR_FREE(mnt_dst); } rc = 0; cleanup: + /* Cleanup temporary mounts */ + for (i = 0; i < nmounts; i++) { + virLXCBasicMountInfo const *mnt = &lxcBasicMounts[i]; + if (mnt->temporary && tmppaths[i]) { + if (umount(tmppaths[i]) < 0) { + virReportSystemError(errno, + _("Failed to un-mount temporary %s"), + tmppaths[i]); + } + if (virFileDeleteTree(tmppaths[i]) < 0) + virReportError(VIR_ERR_INTERNAL_ERROR, + _("Failed to remove temporary folder %s"), + tmppaths[i]); + } + } + + virStringFreeList(tmpkeys); + virStringFreeList(tmppaths); VIR_FREE(mnt_src); + VIR_FREE(mnt_dst); VIR_DEBUG("rc=%d", rc); return rc; } @@ -1696,6 +1785,22 @@ static int lxcContainerUnmountForSharedRoot(const char *stateDir, return ret; } +static bool +lxcNeedNetworkNamespace(virDomainDefPtr def) +{ + size_t i; + if (def->nets != NULL) + return true; + if (def->features[VIR_DOMAIN_FEATURE_PRIVNET] == VIR_TRISTATE_SWITCH_ON) + return true; + for (i = 0; i < def->nhostdevs; i++) { + if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES && + def->hostdevs[i]->source.caps.type == VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET) + return true; + } + return false; +} + /* Got a FS mapped to /, we're going the pivot_root * approach to do a better-chroot-than-chroot @@ -1741,7 +1846,7 @@ static int lxcContainerSetupPivotRoot(virDomainDefPtr vmDef, /* Mounts the core /proc, /sys, etc filesystems */ if (lxcContainerMountBasicFS(vmDef->idmap.nuidmap, - !vmDef->nnets) < 0) + !lxcNeedNetworkNamespace(vmDef)) < 0) goto cleanup; /* Ensure entire root filesystem (except /.oldroot) is readonly */ @@ -2240,22 +2345,6 @@ virArch lxcContainerGetAlt32bitArch(virArch arch) } -static bool -lxcNeedNetworkNamespace(virDomainDefPtr def) -{ - size_t i; - if (def->nets != NULL) - return true; - if (def->features[VIR_DOMAIN_FEATURE_PRIVNET] == VIR_TRISTATE_SWITCH_ON) - return true; - for (i = 0; i < def->nhostdevs; i++) { - if (def->hostdevs[i]->mode == VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES && - def->hostdevs[i]->source.caps.type == VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET) - return true; - } - return false; -} - /** * lxcContainerStart: * @def: pointer to virtual machine structure -- 2.1.2 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list