Quoting Seth Forshee (seth.forshee@xxxxxxxxxxxxx): > Both of these filesystems already have use cases for mounting the > same super block from multiple user namespaces. For sysfs this > happens when using criu for snapshotting a container, where sysfs > is mounted in the containers network ns but the hosts user ns. > The cgroup filesystem shares the same super block for all mounts > of the same hierarchy regardless of the namespace. > > As a result, the restriction on mounting a super block from a > single user namespace creates regressions for existing uses of > these filesystems. For these specific filesystems this > restriction isn't really necessary since the backing store is > objects in kernel memory and thus the ids assigned from inodes > is not subject to translation relative to s_user_ns. > > Add a new filesystem flag, FS_USERNS_SHARE_SB, which when set > causes sget_userns() to skip the check of s_user_ns. Set this > flag for the sysfs and cgroup filesystems to fix the > regressions. > > Signed-off-by: Seth Forshee <seth.forshee@xxxxxxxxxxxxx> Acked-by: Serge Hallyn <serge.hallyn@xxxxxxxxxx> thanks. > --- > fs/super.c | 3 ++- > fs/sysfs/mount.c | 3 ++- > include/linux/fs.h | 1 + > kernel/cgroup.c | 4 ++-- > 4 files changed, 7 insertions(+), 4 deletions(-) > > diff --git a/fs/super.c b/fs/super.c > index 092a7828442e..ead156b44bf8 100644 > --- a/fs/super.c > +++ b/fs/super.c > @@ -472,7 +472,8 @@ retry: > hlist_for_each_entry(old, &type->fs_supers, s_instances) { > if (!test(old, data)) > continue; > - if (user_ns != old->s_user_ns) { > + if (!(type->fs_flags & FS_USERNS_SHARE_SB) && > + user_ns != old->s_user_ns) { > spin_unlock(&sb_lock); > if (s) { > up_write(&s->s_umount); > diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c > index f3db82071cfb..9555accd4322 100644 > --- a/fs/sysfs/mount.c > +++ b/fs/sysfs/mount.c > @@ -59,7 +59,8 @@ static struct file_system_type sysfs_fs_type = { > .name = "sysfs", > .mount = sysfs_mount, > .kill_sb = sysfs_kill_sb, > - .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT, > + .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT | > + FS_USERNS_SHARE_SB, > }; > > int __init sysfs_init(void) > diff --git a/include/linux/fs.h b/include/linux/fs.h > index be0f8023e28c..66a639ec1bc4 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -1988,6 +1988,7 @@ struct file_system_type { > #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ > #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ > #define FS_USERNS_VISIBLE 32 /* FS must already be visible */ > +#define FS_USERNS_SHARE_SB 64 /* Allow sharing sb between userns-es */ > #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ > struct dentry *(*mount) (struct file_system_type *, int, > const char *, void *); > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index 671dc05c0b0f..9c9aa27e531a 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -2247,14 +2247,14 @@ static struct file_system_type cgroup_fs_type = { > .name = "cgroup", > .mount = cgroup_mount, > .kill_sb = cgroup_kill_sb, > - .fs_flags = FS_USERNS_MOUNT, > + .fs_flags = FS_USERNS_MOUNT | FS_USERNS_SHARE_SB, > }; > > static struct file_system_type cgroup2_fs_type = { > .name = "cgroup2", > .mount = cgroup_mount, > .kill_sb = cgroup_kill_sb, > - .fs_flags = FS_USERNS_MOUNT, > + .fs_flags = FS_USERNS_MOUNT | FS_USERNS_SHARE_SB, > }; > > static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, > -- > 1.9.1 -- To unsubscribe from this list: send the line "unsubscribe linux-bcache" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html