On Mon, 2021-12-06 at 16:44 +0100, Christian Brauner wrote: > On Mon, Dec 06, 2021 at 08:38:29AM -0500, James Bottomley wrote: > > On Mon, 2021-12-06 at 13:08 +0100, Christian Brauner wrote: [...] > > > Instead subsequents mounts resurface the same superblock. There > > > might be an inherent design reason why this needs to be this way > > > but I would advise against these semantics for anything that > > > wants to be namespaced. Probably the first securityfs mount in > > > init_user_ns can follow these semantics but ones tied to a non- > > > initial user namespace should not as the userns can go away. In > > > that case the pinning logic seems strange as conceptually the > > > userns pins the securityfs mount as evidenced by the fact that we > > > key by it in get_tree_keyed(). > > > > Yes, that's basically what I did: pin if ns == &init_user_ns but > > don't pin if not. However, I'm still not sure I got the triggers > > right. We have to trigger the notifier call (which adds the > > namespaced file entries) from context free, because that's the > > first place the superblock mount is fully set up ... I can't do it > > in fill_super because the mount isn't fully initialized (and the > > locking prevents it). I did manage to get the notifier for > > teardown triggered from kill_super, though. > > I don't think you need a vfsmount at all to be honest. I think this > can all be done without much ceremony. Here's a brutalist completely > untested patch outlining one approach: This is what I did (incremental to Stefan's series + my previous patch): it avoids superblock threading by switching to a root dentry in the securityfs user namespace area ... or am I being too simple again ... ? I'm still a bit unhappy about triggering a blocking notifier under the umount semaphore ... James --- diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6b8bd060d8c4..03a0879376a0 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -104,8 +104,7 @@ struct user_namespace { struct ima_namespace *ima_ns; #endif #ifdef CONFIG_SECURITYFS - struct vfsmount *securityfs_mount; - bool securityfs_notifier_sent; + struct dentry *securityfs_root; #endif } __randomize_layout; diff --git a/security/inode.c b/security/inode.c index 62ab4630dc31..863fccfd3687 100644 --- a/security/inode.c +++ b/security/inode.c @@ -25,6 +25,7 @@ #include <linux/user_namespace.h> #include <linux/ima.h> +static struct vfsmount *securityfs_mount; static int securityfs_mount_count; static BLOCKING_NOTIFIER_HEAD(securityfs_ns_notifier); @@ -41,42 +42,22 @@ static const struct super_operations securityfs_super_operations = { .free_inode = securityfs_free_inode, }; -static struct file_system_type fs_type; - -static void securityfs_free_context(struct fs_context *fc) -{ - struct user_namespace *ns = fc->user_ns; - if (ns == &init_user_ns || - ns->securityfs_notifier_sent) - return; - - ns->securityfs_notifier_sent = true; - - ns->securityfs_mount = vfs_kern_mount(&fs_type, SB_KERNMOUNT, - fs_type.name, NULL); - if (IS_ERR(ns->securityfs_mount)) { - printk(KERN_ERR "kern mount on securityfs ERROR: %ld\n", - PTR_ERR(ns->securityfs_mount)); - ns->securityfs_mount = NULL; - return; - } - - blocking_notifier_call_chain(&securityfs_ns_notifier, - SECURITYFS_NS_ADD, fc->user_ns); - mntput(ns->securityfs_mount); -} - static int securityfs_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr files[] = {{""}}; int error; + struct user_namespace *ns = fc->user_ns; error = simple_fill_super(sb, SECURITYFS_MAGIC, files); if (error) return error; + ns->securityfs_root = sb->s_root; + sb->s_op = &securityfs_super_operations; + blocking_notifier_call_chain(&securityfs_ns_notifier, + SECURITYFS_NS_ADD, ns); return 0; } @@ -87,7 +68,6 @@ static int securityfs_get_tree(struct fs_context *fc) static const struct fs_context_operations securityfs_context_ops = { .get_tree = securityfs_get_tree, - .free = securityfs_free_context, }; static int securityfs_init_fs_context(struct fs_context *fc) @@ -104,8 +84,7 @@ static void securityfs_kill_super(struct super_block *sb) blocking_notifier_call_chain(&securityfs_ns_notifier, SECURITYFS_NS_REMOVE, sb->s_fs_info); - ns->securityfs_notifier_sent = false; - ns->securityfs_mount = NULL; + ns->securityfs_root = NULL; kill_litter_super(sb); } @@ -179,14 +158,18 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, pr_debug("securityfs: creating file '%s', ns=%u\n",name, ns->ns.inum); if (ns == &init_user_ns) { - error = simple_pin_fs(&fs_type, &ns->securityfs_mount, + error = simple_pin_fs(&fs_type, &securityfs_mount, &securityfs_mount_count); if (error) return ERR_PTR(error); } - if (!parent) - parent = ns->securityfs_mount->mnt_root; + if (!parent) { + if (ns == &init_user_ns) + parent = securityfs_mount->mnt_root; + else + parent = ns->securityfs_root; + } dir = d_inode(parent); @@ -232,7 +215,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, out: inode_unlock(dir); if (ns == &init_user_ns) - simple_release_fs(&ns->securityfs_mount, + simple_release_fs(&securityfs_mount, &securityfs_mount_count); return dentry; } @@ -376,7 +359,7 @@ void securityfs_remove(struct dentry *dentry) } inode_unlock(dir); if (ns == &init_user_ns) - simple_release_fs(&ns->securityfs_mount, + simple_release_fs(&securityfs_mount, &securityfs_mount_count); } EXPORT_SYMBOL(securityfs_remove); @@ -405,8 +388,6 @@ static int __init securityfs_init(void) if (retval) return retval; - init_user_ns.securityfs_mount = NULL; - retval = register_filesystem(&fs_type); if (retval) { sysfs_remove_mount_point(kernel_kobj, "security");