Quoting Djalal Harouni (tixxdz@xxxxxxxxx): > If a process gets access to a mount from a different user > namespace, that process should not be able to take advantage of > setuid files or selinux entrypoints from that filesystem. Prevent > this by treating mounts from other mount namespaces and those not > owned by current_user_ns() or an ancestor as nosuid. > > This patch was just adapted from the original one that was written > by Andy Lutomirski <luto@xxxxxxxxxxxxxx> > https://www.redhat.com/archives/dm-devel/2016-April/msg00374.html I'm not sure that this makes sense given what you're doing. In the case of Seth's set, a filesystem is mounted specifically (and privately) in a user namespace. We don't want for instance the initial user ns to find a link to a setuid-root exploit left in the container-mounted filesystem. But you are having a parent user namespace mount the fs so that its children can all access the fs, uid-shifted for convenience. Not allowing the child namespaces to make use of setuid-root does not seem applicable here. > Signed-off-by: Djalal Harouni <tixxdz@xxxxxxxxxx> > --- > fs/exec.c | 2 +- > fs/namespace.c | 15 +++++++++++++++ > include/linux/mount.h | 1 + > include/linux/user_namespace.h | 8 ++++++++ > kernel/user_namespace.c | 13 +++++++++++++ > security/commoncap.c | 2 +- > security/selinux/hooks.c | 2 +- > 7 files changed, 40 insertions(+), 3 deletions(-) > > diff --git a/fs/exec.c b/fs/exec.c > index c4010b8..706088d 100644 > --- a/fs/exec.c > +++ b/fs/exec.c > @@ -1391,7 +1391,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm) > bprm->cred->euid = current_euid(); > bprm->cred->egid = current_egid(); > > - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) > + if (!mnt_may_suid(bprm->file->f_path.mnt)) > return; > > if (task_no_new_privs(current)) > diff --git a/fs/namespace.c b/fs/namespace.c > index de02b39..a8820fb 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -3374,6 +3374,21 @@ found: > return visible; > } > > +bool mnt_may_suid(struct vfsmount *mnt) > +{ > + struct mount *m = real_mount(mnt); > + > + /* > + * Foreign mounts (accessed via fchdir or through /proc > + * symlinks) are always treated as if they are nosuid. This > + * prevents namespaces from trusting potentially unsafe > + * suid/sgid bits, file caps, or security labels that originate > + * in other namespaces. > + */ > + return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(m) && > + in_userns(current_user_ns(), m->mnt_ns->user_ns); > +} > + > static struct ns_common *mntns_get(struct task_struct *task) > { > struct ns_common *ns = NULL; > diff --git a/include/linux/mount.h b/include/linux/mount.h > index f822c3c..54a594d 100644 > --- a/include/linux/mount.h > +++ b/include/linux/mount.h > @@ -81,6 +81,7 @@ extern void mntput(struct vfsmount *mnt); > extern struct vfsmount *mntget(struct vfsmount *mnt); > extern struct vfsmount *mnt_clone_internal(struct path *path); > extern int __mnt_is_readonly(struct vfsmount *mnt); > +extern bool mnt_may_suid(struct vfsmount *mnt); > > struct path; > extern struct vfsmount *clone_private_mount(struct path *path); > diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h > index 8297e5b..a43faa7 100644 > --- a/include/linux/user_namespace.h > +++ b/include/linux/user_namespace.h > @@ -72,6 +72,8 @@ extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, > extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *); > extern int proc_setgroups_show(struct seq_file *m, void *v); > extern bool userns_may_setgroups(const struct user_namespace *ns); > +extern bool in_userns(const struct user_namespace *ns, > + const struct user_namespace *target_ns); > #else > > static inline struct user_namespace *get_user_ns(struct user_namespace *ns) > @@ -100,6 +102,12 @@ static inline bool userns_may_setgroups(const struct user_namespace *ns) > { > return true; > } > + > +static inline bool in_userns(const struct user_namespace *ns, > + const struct user_namespace *target_ns) > +{ > + return true; > +} > #endif > > #endif /* _LINUX_USER_H */ > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index 9bafc21..9a496a8 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -938,6 +938,19 @@ bool userns_may_setgroups(const struct user_namespace *ns) > return allowed; > } > > +/* > + * Returns true if @ns is the same namespace as or a descendant of > + * @target_ns. > + */ > +bool in_userns(const struct user_namespace *ns, > + const struct user_namespace *target_ns) > +{ > + for (; ns; ns = ns->parent) { > + if (ns == target_ns) > + return true; > + } > +} > + > static inline struct user_namespace *to_user_ns(struct ns_common *ns) > { > return container_of(ns, struct user_namespace, ns); > diff --git a/security/commoncap.c b/security/commoncap.c > index 48071ed..6c082d2 100644 > --- a/security/commoncap.c > +++ b/security/commoncap.c > @@ -453,7 +453,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c > if (!file_caps_enabled) > return 0; > > - if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) > + if (!mnt_may_suid(bprm->file->f_path.mnt)) > return 0; > > rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps); > diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c > index 912deee..1350167 100644 > --- a/security/selinux/hooks.c > +++ b/security/selinux/hooks.c > @@ -2234,7 +2234,7 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, > const struct task_security_struct *new_tsec) > { > int nnp = (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS); > - int nosuid = (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID); > + int nosuid = !mnt_may_suid(bprm->file->f_path.mnt); > int rc; > > if (!nnp && !nosuid) > -- > 2.5.5 > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html