The initial namespace is special in many ways. One feature it always has had is that it propagates all its devices into all non-initial namespaces. This is e.g. true for all device classes under /sys/class/ except the net_class. Even though none of the propagated files can be used there are still a lot of read-only values that are accessed or read by tools running in non-initial namespaces. To not regress such workloads we introduce the ability to tell kernfs to continue propagating devices from the initial namespace even when the kernfs_node is tagged with a non-initial namespace. Note that this is a purely opt-in feature, i.e. if there were a new device class that wanted to make use of this new infrastructure and did not want to propagate any devices into non-initial namespaces it could simply not implement the relevant callback. When a new directory in sysfs is created sysfs now can simply check whether the relevant device wants to propagate objects from the initial namespace or not. Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Christian Brauner <christian.brauner@xxxxxxxxxx> --- fs/kernfs/dir.c | 34 +++++++++++++++++++++++++++++----- fs/kernfs/kernfs-internal.h | 14 ++++++++++++++ include/linux/kernfs.h | 22 ++++++++++++++++++++++ include/linux/kobject_ns.h | 3 +++ lib/kobject.c | 2 ++ 5 files changed, 70 insertions(+), 5 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 1f2d894ae454..02796ba6521a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -575,10 +575,15 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) goto out_bad; /* The kernfs node has been moved to a different namespace */ - if (kn->parent && kernfs_ns_enabled(kn->parent) && - kernfs_info(dentry->d_sb)->ns[kn->ns_type] != kn->ns) - goto out_bad; + if (kn->parent && kernfs_ns_enabled(kn->parent)) { + if (kernfs_init_ns_propagates(kn->parent) && + kn->ns == kernfs_init_ns(kn->parent->ns_type)) + goto out_good; + if (kernfs_info(dentry->d_sb)->ns[kn->parent->ns_type] != kn->ns) + goto out_bad; + } +out_good: mutex_unlock(&kernfs_mutex); return 1; out_bad: @@ -1090,6 +1095,10 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, ns = kernfs_info(dir->i_sb)->ns[parent->ns_type]; kn = kernfs_find_ns(parent, dentry->d_name.name, ns); + if (!kn && kernfs_init_ns_propagates(parent)) { + ns = kernfs_init_ns(parent->ns_type); + kn = kernfs_find_ns(parent, dentry->d_name.name, ns); + } /* no such entry */ if (!kn || !kernfs_active(kn)) { @@ -1614,6 +1623,8 @@ static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) static struct kernfs_node *kernfs_dir_pos(const void *ns, struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) { + const void *init_ns; + if (pos) { int valid = kernfs_active(pos) && pos->parent == parent && hash == pos->hash; @@ -1621,6 +1632,12 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, if (!valid) pos = NULL; } + + if (kernfs_init_ns_propagates(parent)) + init_ns = kernfs_init_ns(parent->ns_type); + else + init_ns = NULL; + if (!pos && (hash > 1) && (hash < INT_MAX)) { struct rb_node *node = parent->dir.children.rb_node; while (node) { @@ -1635,7 +1652,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, } } /* Skip over entries which are dying/dead or in the wrong namespace */ - while (pos && (!kernfs_active(pos) || pos->ns != ns)) { + while (pos && (!kernfs_active(pos) || (pos->ns != ns && pos->ns != init_ns))) { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; @@ -1650,13 +1667,20 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns, { pos = kernfs_dir_pos(ns, parent, ino, pos); if (pos) { + const void *init_ns; + if (kernfs_init_ns_propagates(parent)) + init_ns = kernfs_init_ns(parent->ns_type); + else + init_ns = NULL; + do { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; else pos = rb_to_kn(node); - } while (pos && (!kernfs_active(pos) || pos->ns != ns)); + } while (pos && (!kernfs_active(pos) || + (pos->ns != ns && pos->ns != init_ns))); } return pos; } diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 6c375eb59460..4ba7b36103de 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -78,6 +78,20 @@ static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry) return d_inode(dentry)->i_private; } +extern struct net init_net; + +static inline const void *kernfs_init_ns(enum kobj_ns_type ns_type) +{ + switch (ns_type) { + case KOBJ_NS_TYPE_NET: + return &init_net; + default: + pr_debug("Unsupported namespace type %d for kernfs\n", ns_type); + } + + return NULL; +} + extern const struct super_operations kernfs_sops; extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache; diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 0e4414bd7007..5e2143e69c1c 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -51,6 +51,7 @@ enum kernfs_node_flag { KERNFS_SUICIDED = 0x0800, KERNFS_EMPTY_DIR = 0x1000, KERNFS_HAS_RELEASE = 0x2000, + KERNFS_NS_PROPAGATE = 0x4000, }; /* @flags for kernfs_create_root() */ @@ -330,6 +331,27 @@ static inline void kernfs_enable_ns(struct kernfs_node *kn, kn->ns_type = ns_type; } +static inline void kernfs_enable_init_ns_propagates(struct kernfs_node *kn) +{ + WARN_ON_ONCE(kernfs_type(kn) != KERNFS_DIR); + WARN_ON_ONCE(!RB_EMPTY_ROOT(&kn->dir.children)); + WARN_ON_ONCE(!(kn->flags & KERNFS_NS)); + kn->flags |= KERNFS_NS_PROPAGATE; +} + +/** + * kernfs_init_ns_propagates - test whether init ns propagates + * @kn: the node to test + * + * Test whether kernfs entries created in the init namespace propagate into + * other namespaces. + */ +static inline bool kernfs_init_ns_propagates(const struct kernfs_node *kn) +{ + return ((kn->flags & (KERNFS_NS | KERNFS_NS_PROPAGATE)) == + (KERNFS_NS | KERNFS_NS_PROPAGATE)); +} + /** * kernfs_ns_enabled - test whether namespace is enabled * @kn: the node to test diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 991a9286bcea..216f9112ee1d 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -34,6 +34,8 @@ enum kobj_ns_type { * @grab_current_ns: return a new reference to calling task's namespace * @initial_ns: return the initial namespace (i.e. init_net_ns) * @drop_ns: drops a reference to namespace + * @initial_ns_propagates: whether devices in the initial namespace propagate + * to all other namespaces */ struct kobj_ns_type_operations { enum kobj_ns_type type; @@ -41,6 +43,7 @@ struct kobj_ns_type_operations { void *(*grab_current_ns)(void); const void *(*initial_ns)(void); void (*drop_ns)(void *); + bool (*initial_ns_propagates)(void); }; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); diff --git a/lib/kobject.c b/lib/kobject.c index c58c62d49a10..96bb8c732d1c 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -121,6 +121,8 @@ static int create_dir(struct kobject *kobj) BUG_ON(!kobj_ns_type_registered(ops->type)); sysfs_enable_ns(kobj->sd, ops->type); + if (ops->initial_ns_propagates && ops->initial_ns_propagates()) + kernfs_enable_init_ns_propagates(kobj->sd); } return 0; -- 2.26.0