On Wed, 2017-02-01 at 19:38 +1300, Eric W. Biederman wrote: > To support unprivileged users mounting filesystems two permission > checks have to be performed: a test to see if the user allowed to > create a mount in the mount namespace, and a test to see if > the user is allowed to access the specified filesystem. > > The automount case is special in that mounting the original > filesystem > grants permission to mount the sub-filesystems, to any user who > happens to stumble across the their mountpoint and satisfies the > ordinary filesystem permission checks. > > Attempting to handle the automount case by using override_creds > almost works. It preserves the idea that permission to mount > the original filesystem is permission to mount the sub-filesystem. > Unfortunately using override_creds messes up the filesystems > ordinary permission checks. > > Solve this by being explicit that a mount is a submount by > introducing > vfs_submount, and using it where appropriate. > > vfs_submount uses a new mount internal mount flags MS_SUBMOUNT, to > let > sget and friends know that a mount is a submount so they can take > appropriate > action. > > sget and sget_userns are modified to not perform any permission > checks > on submounts. > > follow_automount is modified to stop using override_creds as that > has proven problemantic. > > do_mount is modified to always remove the new MS_SUBMOUNT flag so > that we know userspace will never by able to specify it. > > autofs4 is modified to stop using current_real_cred that was put in > there to handle the previous version of submount permission checking. > > cifs is modified to pass the mountpoint all of the way down to > vfs_submount. > > debugfs is modified to pass the mountpoint all of the way down to > trace_automount by adding a new parameter. To make this change > easier > a new typedef debugfs_automount_t is introduced to capture the type > of > the debugfs automount function. > > Cc: stable@xxxxxxxxxxxxxxx > Fixes: 069d5ac9ae0d ("autofs: Fix automounts by using > current_real_cred()->uid") > Fixes: aeaa4a79ff6a ("fs: Call d_automount with the filesystems > creds") > Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> > --- > fs/afs/mntpt.c | 2 +- > fs/autofs4/waitq.c | 4 ++-- > fs/cifs/cifs_dfs_ref.c | 7 ++++--- > fs/debugfs/inode.c | 8 ++++---- > fs/namei.c | 3 --- > fs/namespace.c | 17 ++++++++++++++++- > fs/nfs/namespace.c | 2 +- > fs/nfs/nfs4namespace.c | 2 +- > fs/super.c | 13 ++++++++++--- > include/linux/debugfs.h | 3 ++- > include/linux/mount.h | 3 +++ > include/uapi/linux/fs.h | 1 + > kernel/trace/trace.c | 4 ++-- > 13 files changed, 47 insertions(+), 22 deletions(-) > > diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c > index 81dd075356b9..d4fb0afc0097 100644 > --- a/fs/afs/mntpt.c > +++ b/fs/afs/mntpt.c > @@ -202,7 +202,7 @@ static struct vfsmount > *afs_mntpt_do_automount(struct dentry *mntpt) > > /* try and do the mount */ > _debug("--- attempting mount %s -o %s ---", devname, > options); > - mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); > + mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); > _debug("--- mount result %p ---", mnt); > > free_page((unsigned long) devname); > diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c > index 1278335ce366..79fbd85db4ba 100644 > --- a/fs/autofs4/waitq.c > +++ b/fs/autofs4/waitq.c > @@ -436,8 +436,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, > memcpy(&wq->name, &qstr, sizeof(struct qstr)); > wq->dev = autofs4_get_dev(sbi); > wq->ino = autofs4_get_ino(sbi); > - wq->uid = current_real_cred()->uid; > - wq->gid = current_real_cred()->gid; > + wq->uid = current_cred()->uid; > + wq->gid = current_cred()->gid; > wq->pid = pid; > wq->tgid = tgid; > wq->status = -EINTR; /* Status return if interrupted > */ > diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c > index ec9dbbcca3b9..9156be545b0f 100644 > --- a/fs/cifs/cifs_dfs_ref.c > +++ b/fs/cifs/cifs_dfs_ref.c > @@ -245,7 +245,8 @@ char *cifs_compose_mount_options(const char > *sb_mountdata, > * @fullpath: full path in UNC format > * @ref: server's referral > */ > -static struct vfsmount *cifs_dfs_do_refmount(struct cifs_sb_info > *cifs_sb, > +static struct vfsmount *cifs_dfs_do_refmount(struct dentry *mntpt, > + struct cifs_sb_info *cifs_sb, > const char *fullpath, const struct dfs_info3_param > *ref) > { > struct vfsmount *mnt; > @@ -259,7 +260,7 @@ static struct vfsmount > *cifs_dfs_do_refmount(struct cifs_sb_info *cifs_sb, > if (IS_ERR(mountdata)) > return (struct vfsmount *)mountdata; > > - mnt = vfs_kern_mount(&cifs_fs_type, 0, devname, mountdata); > + mnt = vfs_submount(mntpt, &cifs_fs_type, devname, > mountdata); > kfree(mountdata); > kfree(devname); > return mnt; > @@ -334,7 +335,7 @@ static struct vfsmount > *cifs_dfs_do_automount(struct dentry *mntpt) > mnt = ERR_PTR(-EINVAL); > break; > } > - mnt = cifs_dfs_do_refmount(cifs_sb, > + mnt = cifs_dfs_do_refmount(mntpt, cifs_sb, > full_path, referrals + i); > cifs_dbg(FYI, "%s: cifs_dfs_do_refmount:%s , > mnt:%p\n", > __func__, referrals[i].node_name, mnt); > diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c > index f17fcf89e18e..1e30f74a9527 100644 > --- a/fs/debugfs/inode.c > +++ b/fs/debugfs/inode.c > @@ -187,9 +187,9 @@ static const struct super_operations > debugfs_super_operations = { > > static struct vfsmount *debugfs_automount(struct path *path) > { > - struct vfsmount *(*f)(void *); > - f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata; > - return f(d_inode(path->dentry)->i_private); > + debugfs_automount_t f; > + f = (debugfs_automount_t)path->dentry->d_fsdata; > + return f(path->dentry, d_inode(path->dentry)->i_private); > } > > static const struct dentry_operations debugfs_dops = { > @@ -504,7 +504,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); > */ > struct dentry *debugfs_create_automount(const char *name, > struct dentry *parent, > - struct vfsmount *(*f)(void > *), > + debugfs_automount_t f, > void *data) > { > struct dentry *dentry = start_creating(name, parent); > diff --git a/fs/namei.c b/fs/namei.c > index 6fa3e9138fe4..da689c9c005e 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -1100,7 +1100,6 @@ static int follow_automount(struct path *path, > struct nameidata *nd, > bool *need_mntput) > { > struct vfsmount *mnt; > - const struct cred *old_cred; > int err; > > if (!path->dentry->d_op || !path->dentry->d_op->d_automount) > @@ -1129,9 +1128,7 @@ static int follow_automount(struct path *path, > struct nameidata *nd, > if (nd->total_link_count >= 40) > return -ELOOP; > > - old_cred = override_creds(&init_cred); > mnt = path->dentry->d_op->d_automount(path); > - revert_creds(old_cred); > if (IS_ERR(mnt)) { > /* > * The filesystem is allowed to return -EISDIR here > to indicate > diff --git a/fs/namespace.c b/fs/namespace.c > index 487ba30bb5c6..089a6b23135a 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -989,6 +989,21 @@ vfs_kern_mount(struct file_system_type *type, > int flags, const char *name, void > } > EXPORT_SYMBOL_GPL(vfs_kern_mount); > > +struct vfsmount * > +vfs_submount(const struct dentry *mountpoint, struct > file_system_type *type, > + const char *name, void *data) > +{ > + /* Until it is worked out how to pass the user namespace > + * through from the parent mount to the submount don't > support > + * unprivileged mounts with submounts. > + */ > + if (mountpoint->d_sb->s_user_ns != &init_user_ns) > + return ERR_PTR(-EPERM); > + > + return vfs_kern_mount(type, MS_SUBMOUNT, name, data); > +} > +EXPORT_SYMBOL_GPL(vfs_submount); > + > static struct mount *clone_mnt(struct mount *old, struct dentry > *root, > int flag) > { > @@ -2794,7 +2809,7 @@ long do_mount(const char *dev_name, const char > __user *dir_name, > > flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | > MS_BORN | > MS_NOATIME | MS_NODIRATIME | MS_RELATIME| > MS_KERNMOUNT | > - MS_STRICTATIME | MS_NOREMOTELOCK); > + MS_STRICTATIME | MS_NOREMOTELOCK | MS_SUBMOUNT); > > if (flags & MS_REMOUNT) > retval = do_remount(&path, flags & ~MS_REMOUNT, > mnt_flags, > diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c > index 5551e8ef67fd..e49d831c4e85 100644 > --- a/fs/nfs/namespace.c > +++ b/fs/nfs/namespace.c > @@ -226,7 +226,7 @@ static struct vfsmount *nfs_do_clone_mount(struct > nfs_server *server, > const char *devname, > struct nfs_clone_mount > *mountdata) > { > - return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, > mountdata); > + return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, > devname, mountdata); > } > > /** > diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c > index d21104912676..d8b040bd9814 100644 > --- a/fs/nfs/nfs4namespace.c > +++ b/fs/nfs/nfs4namespace.c > @@ -279,7 +279,7 @@ static struct vfsmount *try_location(struct > nfs_clone_mount *mountdata, > mountdata->hostname, > mountdata->mnt_path); > > - mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, > page, mountdata); > + mnt = vfs_submount(mountdata->dentry, > &nfs4_referral_fs_type, page, mountdata); > if (!IS_ERR(mnt)) > break; > } > diff --git a/fs/super.c b/fs/super.c > index 1709ed029a2c..4185844f7a12 100644 > --- a/fs/super.c > +++ b/fs/super.c > @@ -469,7 +469,7 @@ struct super_block *sget_userns(struct > file_system_type *type, > struct super_block *old; > int err; > > - if (!(flags & MS_KERNMOUNT) && > + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && > !(type->fs_flags & FS_USERNS_MOUNT) && > !capable(CAP_SYS_ADMIN)) > return ERR_PTR(-EPERM); > @@ -499,7 +499,7 @@ struct super_block *sget_userns(struct > file_system_type *type, > } > if (!s) { > spin_unlock(&sb_lock); > - s = alloc_super(type, flags, user_ns); > + s = alloc_super(type, (flags & ~MS_SUBMOUNT), > user_ns); > if (!s) > return ERR_PTR(-ENOMEM); > goto retry; > @@ -540,8 +540,15 @@ struct super_block *sget(struct file_system_type > *type, > { > struct user_namespace *user_ns = current_user_ns(); > > + /* We don't yet pass the user namespace of the parent > + * mount through to here so always use &init_user_ns > + * until that changes. > + */ > + if (flags & MS_SUBMOUNT) > + user_ns = &init_user_ns; > + > /* Ensure the requestor has permissions over the target > filesystem */ > - if (!(flags & MS_KERNMOUNT) && !ns_capable(user_ns, > CAP_SYS_ADMIN)) > + if (!(flags & (MS_KERNMOUNT|MS_SUBMOUNT)) && > !ns_capable(user_ns, CAP_SYS_ADMIN)) > return ERR_PTR(-EPERM); > > return sget_userns(type, test, set, flags, user_ns, data); > diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h > index 014cc564d1c4..233006be30aa 100644 > --- a/include/linux/debugfs.h > +++ b/include/linux/debugfs.h > @@ -97,9 +97,10 @@ struct dentry *debugfs_create_dir(const char > *name, struct dentry *parent); > struct dentry *debugfs_create_symlink(const char *name, struct > dentry *parent, > const char *dest); > > +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, > void *); > struct dentry *debugfs_create_automount(const char *name, > struct dentry *parent, > - struct vfsmount *(*f)(void > *), > + debugfs_automount_t f, > void *data); > > void debugfs_remove(struct dentry *dentry); > diff --git a/include/linux/mount.h b/include/linux/mount.h > index c6f55158d5e5..8e0352af06b7 100644 > --- a/include/linux/mount.h > +++ b/include/linux/mount.h > @@ -90,6 +90,9 @@ struct file_system_type; > extern struct vfsmount *vfs_kern_mount(struct file_system_type > *type, > int flags, const char *name, > void *data); > +extern struct vfsmount *vfs_submount(const struct dentry > *mountpoint, > + struct file_system_type *type, > + const char *name, void *data); > > extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head > *expiry_list); > extern void mark_mounts_for_expiry(struct list_head *mounts); > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h > index 36da93fbf188..048a85e9f017 100644 > --- a/include/uapi/linux/fs.h > +++ b/include/uapi/linux/fs.h > @@ -132,6 +132,7 @@ struct inodes_stat_t { > #define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times > lazily */ > > /* These sb flags are internal to the kernel */ > +#define MS_SUBMOUNT (1<<26) > #define MS_NOREMOTELOCK (1<<27) > #define MS_NOSEC (1<<28) > #define MS_BORN (1<<29) > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index d7449783987a..310f0ea0d1a2 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -7503,7 +7503,7 @@ init_tracer_tracefs(struct trace_array *tr, > struct dentry *d_tracer) > ftrace_init_tracefs(tr, d_tracer); > } > > -static struct vfsmount *trace_automount(void *ingore) > +static struct vfsmount *trace_automount(struct dentry *mntpt, void > *ingore) > { > struct vfsmount *mnt; > struct file_system_type *type; > @@ -7516,7 +7516,7 @@ static struct vfsmount *trace_automount(void > *ingore) > type = get_fs_type("tracefs"); > if (!type) > return NULL; > - mnt = vfs_kern_mount(type, 0, "tracefs", NULL); > + mnt = vfs_submount(mntpt, type, "tracefs", NULL); > put_filesystem(type); > if (IS_ERR(mnt)) > return NULL; Yes. That looks like it might be workable. Reviewed-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> -- Trond Myklebust Linux NFS client maintainer, PrimaryData trond.myklebust@xxxxxxxxxxxxxxx ��.n��������+%������w��{.n�����{��w���jg��������ݢj����G�������j:+v���w�m������w�������h�����٥