Add a ns tag struct that consists of just a refcount. It's address can be used to compare namespaces without the need to pin a namespace. Just the tag needs pinning. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> --- fs/namespace.c | 18 ++++++++---------- include/linux/ns_common.h | 23 +++++++++++++++++++++++ include/linux/proc_ns.h | 38 +++++++++++++++++++++++++++++++++++--- init/version.c | 9 ++++++++- ipc/msgutil.c | 7 ++++++- ipc/namespace.c | 8 +++----- kernel/cgroup/cgroup.c | 5 +++++ kernel/cgroup/namespace.c | 6 +++--- kernel/pid.c | 5 +++++ kernel/pid_namespace.c | 18 +++++++++--------- kernel/time/namespace.c | 13 +++++-------- kernel/user.c | 5 +++++ kernel/user_namespace.c | 7 +++---- kernel/utsname.c | 24 +++++++++++++----------- net/core/net_namespace.c | 38 +++++++++++++++----------------------- 15 files changed, 146 insertions(+), 78 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 9d33909d0f9e..f8da9be8c6f7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3238,10 +3238,9 @@ static void dec_mnt_namespaces(struct ucounts *ucounts) static void free_mnt_ns(struct mnt_namespace *ns) { - if (!is_anon_ns(ns)) - ns_free_inum(&ns->ns); dec_mnt_namespaces(ns->ucounts); put_user_ns(ns->user_ns); + destroy_ns_common(&ns->ns); kfree(ns); } @@ -3269,18 +3268,17 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a dec_mnt_namespaces(ucounts); return ERR_PTR(-ENOMEM); } - if (!anon) { - ret = ns_alloc_inum(&new_ns->ns); - if (ret) { - kfree(new_ns); - dec_mnt_namespaces(ucounts); - return ERR_PTR(ret); - } + + ret = init_ns_common(&new_ns->ns, anon); + if (ret) { + destroy_ns_common(&new_ns->ns); + kfree(new_ns); + dec_mnt_namespaces(ucounts); + return ERR_PTR(ret); } new_ns->ns.ops = &mntns_operations; if (!anon) new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); - refcount_set(&new_ns->ns.count, 1); INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); spin_lock_init(&new_ns->ns_lock); diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 0f1d024bd958..45174ad8a435 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -3,14 +3,37 @@ #define _LINUX_NS_COMMON_H #include <linux/refcount.h> +#include <linux/slab.h> struct proc_ns_operations; +/* + * Comparable tag for namespaces so that namespaces don't have to be pinned by + * something that wishes to detect if a namespace matches a criterion. + */ +struct ns_tag { + refcount_t usage; +}; + struct ns_common { atomic_long_t stashed; const struct proc_ns_operations *ops; + struct ns_tag *tag; unsigned int inum; refcount_t count; }; +static inline struct ns_tag *get_ns_tag(struct ns_tag *tag) +{ + if (tag) + refcount_inc(&tag->usage); + return tag; +} + +static inline void put_ns_tag(struct ns_tag *tag) +{ + if (tag && refcount_dec_and_test(&tag->usage)) + kfree(tag); +} + #endif diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 75807ecef880..9fb7eb403923 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -64,13 +64,45 @@ static inline void proc_free_inum(unsigned int inum) {} #endif /* CONFIG_PROC_FS */ -static inline int ns_alloc_inum(struct ns_common *ns) +/** + * init_ns_common - Initialise the common part of a namespace + * @ns: The namespace to initialise + * @anon: The namespace will be anonymous + * + * Set up the common part of a namespace, assigning an inode number and + * creating a tag. Returns 0 on success and a negative error code on failure. + * On failure, the caller must call destroy_ns_common(). + */ +static inline int init_ns_common(struct ns_common *ns, bool anon) { + struct ns_tag *tag; + + tag = kzalloc(sizeof(*tag), GFP_KERNEL); + if (!tag) + return -ENOMEM; + + refcount_set(&tag->usage, 1); + ns->tag = tag; + ns->inum = 0; atomic_long_set(&ns->stashed, 0); - return proc_alloc_inum(&ns->inum); + refcount_set(&ns->count, 1); + + return anon ? 0 : proc_alloc_inum(&ns->inum); } -#define ns_free_inum(ns) proc_free_inum((ns)->inum) +/** + * destroy_ns_common - Clean up the common part of a namespace + * @ns: The namespace to clean up + */ +static inline void destroy_ns_common(struct ns_common *ns) +{ + put_ns_tag(ns->tag); + ns->tag = NULL; + if (ns->inum) { + proc_free_inum(ns->inum); + ns->inum = 0; + } +} extern struct file *proc_ns_fget(int fd); #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private) diff --git a/init/version.c b/init/version.c index 80d2b7566b39..3c867b6c4aa4 100644 --- a/init/version.c +++ b/init/version.c @@ -24,8 +24,15 @@ extern int version_string(LINUX_VERSION_CODE); int version_string(LINUX_VERSION_CODE); #endif +static struct ns_tag init_uts_ns_tag = { + .usage = REFCOUNT_INIT(1), +}; + struct uts_namespace init_uts_ns = { - .ns.count = REFCOUNT_INIT(2), + .ns = { + .count = REFCOUNT_INIT(2), + .tag = &init_uts_ns_tag, + }, .name = { .sysname = UTS_SYSNAME, .nodename = UTS_NODENAME, diff --git a/ipc/msgutil.c b/ipc/msgutil.c index d0a0e877cadd..62bf194c38c6 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -20,13 +20,18 @@ DEFINE_SPINLOCK(mq_lock); +static struct ns_tag init_ipc_ns_tag = { + .usage = REFCOUNT_INIT(1), +}; + /* * The next 2 defines are here bc this is the only file * compiled when either CONFIG_SYSVIPC and CONFIG_POSIX_MQUEUE * and not CONFIG_IPC_NS. */ struct ipc_namespace init_ipc_ns = { - .ns.count = REFCOUNT_INIT(1), + .ns.tag = &init_ipc_ns_tag, + .ns.count = REFCOUNT_INIT(2), .user_ns = &init_user_ns, .ns.inum = PROC_IPC_INIT_INO, #ifdef CONFIG_IPC_NS diff --git a/ipc/namespace.c b/ipc/namespace.c index 7bd0766ddc3b..06c0829ab866 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -46,12 +46,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, if (ns == NULL) goto fail_dec; - err = ns_alloc_inum(&ns->ns); + err = init_ns_common(&ns->ns, false); if (err) goto fail_free; ns->ns.ops = &ipcns_operations; - - refcount_set(&ns->ns.count, 1); ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; @@ -67,8 +65,8 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, fail_put: put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); fail_free: + destroy_ns_common(&ns->ns); kfree(ns); fail_dec: dec_ipc_namespaces(ucounts); @@ -127,7 +125,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) dec_ipc_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); + destroy_ns_common(&ns->ns); kfree(ns); } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 613845769103..fb397fa2386f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -197,8 +197,13 @@ static u16 have_exit_callback __read_mostly; static u16 have_release_callback __read_mostly; static u16 have_canfork_callback __read_mostly; +static struct ns_tag init_cgroup_ns_tag = { + .usage = REFCOUNT_INIT(1), +}; + /* cgroup namespace for init task */ struct cgroup_namespace init_cgroup_ns = { + .ns.tag = &init_cgroup_ns_tag, .ns.count = REFCOUNT_INIT(2), .user_ns = &init_user_ns, .ns.ops = &cgroupns_operations, diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c index f5e8828c109c..7c8c0ccd1feb 100644 --- a/kernel/cgroup/namespace.c +++ b/kernel/cgroup/namespace.c @@ -27,12 +27,12 @@ static struct cgroup_namespace *alloc_cgroup_ns(void) new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); - ret = ns_alloc_inum(&new_ns->ns); + ret = init_ns_common(&new_ns->ns, false); if (ret) { + destroy_ns_common(&new_ns->ns); kfree(new_ns); return ERR_PTR(ret); } - refcount_set(&new_ns->ns.count, 1); new_ns->ns.ops = &cgroupns_operations; return new_ns; } @@ -42,7 +42,7 @@ void free_cgroup_ns(struct cgroup_namespace *ns) put_css_set(ns->root_cset); dec_cgroup_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); + destroy_ns_common(&ns->ns); kfree(ns); } EXPORT_SYMBOL(free_cgroup_ns); diff --git a/kernel/pid.c b/kernel/pid.c index ebdf9c60cd0b..65015c5b26db 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -66,6 +66,10 @@ int pid_max = PID_MAX_DEFAULT; int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; +static struct ns_tag init_pid_ns_tag = { + .usage = REFCOUNT_INIT(1), +}; + /* * PID-map pages start out as NULL, they get allocated upon * first use and are never deallocated. This way a low pid_max @@ -73,6 +77,7 @@ int pid_max_max = PID_MAX_LIMIT; * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = { + .ns.tag = &init_pid_ns_tag, .ns.count = REFCOUNT_INIT(2), .idr = IDR_INIT(init_pid_ns.idr), .pid_allocated = PIDNS_ADDING, diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index ca43239a255a..a562071e52e1 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -93,16 +93,15 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns idr_init(&ns->idr); - ns->pid_cachep = create_pid_cachep(level); - if (ns->pid_cachep == NULL) - goto out_free_idr; - - err = ns_alloc_inum(&ns->ns); + err = init_ns_common(&ns->ns, false); if (err) - goto out_free_idr; + goto out_free; ns->ns.ops = &pidns_operations; - refcount_set(&ns->ns.count, 1); + ns->pid_cachep = create_pid_cachep(level); + if (ns->pid_cachep == NULL) + goto out_free; + ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); @@ -111,8 +110,9 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns return ns; -out_free_idr: +out_free: idr_destroy(&ns->idr); + destroy_ns_common(&ns->ns); kmem_cache_free(pid_ns_cachep, ns); out_dec: dec_pid_namespaces(ucounts); @@ -132,7 +132,7 @@ static void delayed_free_pidns(struct rcu_head *p) static void destroy_pid_namespace(struct pid_namespace *ns) { - ns_free_inum(&ns->ns); + destroy_ns_common(&ns->ns); idr_destroy(&ns->idr); call_rcu(&ns->rcu, delayed_free_pidns); diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 6ca625f5e554..5c5847048900 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -92,16 +92,14 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, if (!ns) goto fail_dec; - refcount_set(&ns->ns.count, 1); + err = init_ns_common(&ns->ns, false); + if (err) + goto fail_free; ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!ns->vvar_page) goto fail_free; - err = ns_alloc_inum(&ns->ns); - if (err) - goto fail_free_page; - ns->ucounts = ucounts; ns->ns.ops = &timens_operations; ns->user_ns = get_user_ns(user_ns); @@ -109,9 +107,8 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, ns->frozen_offsets = false; return ns; -fail_free_page: - __free_page(ns->vvar_page); fail_free: + destroy_ns_common(&ns->ns); kfree(ns); fail_dec: dec_time_namespaces(ucounts); @@ -230,7 +227,7 @@ void free_time_ns(struct time_namespace *ns) { dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); + destroy_ns_common(&ns->ns); __free_page(ns->vvar_page); kfree(ns); } diff --git a/kernel/user.c b/kernel/user.c index a2478cddf536..78ee75f4cd21 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -20,6 +20,10 @@ #include <linux/user_namespace.h> #include <linux/proc_ns.h> +static struct ns_tag init_user_ns_tag = { + .usage = REFCOUNT_INIT(1), +}; + /* * userns count is 1 for root user, 1 for init_uts_ns, * and 1 for... ? @@ -55,6 +59,7 @@ struct user_namespace init_user_ns = { }, }, }, + .ns.tag = &init_user_ns_tag, .ns.count = REFCOUNT_INIT(3), .owner = GLOBAL_ROOT_UID, .group = GLOBAL_ROOT_GID, diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index af612945a4d0..f60cf7b5973c 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -106,12 +106,11 @@ int create_user_ns(struct cred *new) if (!ns) goto fail_dec; - ret = ns_alloc_inum(&ns->ns); + ret = init_ns_common(&ns->ns, false); if (ret) goto fail_free; ns->ns.ops = &userns_operations; - refcount_set(&ns->ns.count, 1); /* Leave the new->user_ns reference with the new user namespace. */ ns->parent = parent_ns; ns->level = parent_ns->level + 1; @@ -142,8 +141,8 @@ int create_user_ns(struct cred *new) #ifdef CONFIG_PERSISTENT_KEYRINGS key_put(ns->persistent_keyring_register); #endif - ns_free_inum(&ns->ns); fail_free: + destroy_ns_common(&ns->ns); kmem_cache_free(user_ns_cachep, ns); fail_dec: dec_user_namespaces(ucounts); @@ -193,7 +192,7 @@ static void free_user_ns(struct work_struct *work) } retire_userns_sysctls(ns); key_free_user_ns(ns); - ns_free_inum(&ns->ns); + destroy_ns_common(&ns->ns); kmem_cache_free(user_ns_cachep, ns); dec_user_namespaces(ucounts); ns = parent; diff --git a/kernel/utsname.c b/kernel/utsname.c index b1ac3ca870f2..4755f007199f 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -30,10 +30,17 @@ static void dec_uts_namespaces(struct ucounts *ucounts) static struct uts_namespace *create_uts_ns(void) { struct uts_namespace *uts_ns; + int err; uts_ns = kmem_cache_alloc(uts_ns_cache, GFP_KERNEL); - if (uts_ns) - refcount_set(&uts_ns->ns.count, 1); + if (uts_ns) { + err = init_ns_common(&uts_ns->ns, false); + if (err < 0) { + destroy_ns_common(&uts_ns->ns); + kmem_cache_free(uts_ns_cache, uts_ns); + return ERR_PTR(err); + } + } return uts_ns; } @@ -54,14 +61,11 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, if (!ucounts) goto fail; - err = -ENOMEM; ns = create_uts_ns(); - if (!ns) + if (IS_ERR(ns)) { + err = PTR_ERR(ns); goto fail_dec; - - err = ns_alloc_inum(&ns->ns); - if (err) - goto fail_free; + } ns->ucounts = ucounts; ns->ns.ops = &utsns_operations; @@ -72,8 +76,6 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns, up_read(&uts_sem); return ns; -fail_free: - kmem_cache_free(uts_ns_cache, ns); fail_dec: dec_uts_namespaces(ucounts); fail: @@ -107,7 +109,7 @@ void free_uts_ns(struct uts_namespace *ns) { dec_uts_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); + destroy_ns_common(&ns->ns); kmem_cache_free(uts_ns_cache, ns); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2ef3b4557f40..f53f7ddec553 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -44,8 +44,14 @@ EXPORT_SYMBOL_GPL(net_rwsem); static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; #endif +static struct ns_tag init_net_tag = { + .usage = REFCOUNT_INIT(1), +}; + struct net init_net = { - .ns.count = REFCOUNT_INIT(1), + .ns.tag = &init_net_tag, + .ns.count = REFCOUNT_INIT(2), + .ns.ops = &netns_operations, .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), #ifdef CONFIG_KEYS .key_domain = &init_net_key_domain, @@ -329,7 +335,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) int error = 0; LIST_HEAD(net_exit_list); - refcount_set(&net->ns.count, 1); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; @@ -419,6 +424,10 @@ static struct net *net_alloc(void) if (!net) goto out_free; + if (init_ns_common(&net->ns, false) < 0) + goto out_free_2; + net->ns.ops = &netns_operations; + #ifdef CONFIG_KEYS net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL); if (!net->key_domain) @@ -432,6 +441,7 @@ static struct net *net_alloc(void) #ifdef CONFIG_KEYS out_free_2: + destroy_ns_common(&net->ns); kmem_cache_free(net_cachep, net); net = NULL; #endif @@ -443,6 +453,7 @@ static struct net *net_alloc(void) static void net_free(struct net *net) { kfree(rcu_access_pointer(net->gen)); + destroy_ns_common(&net->ns); kmem_cache_free(net_cachep, net); } @@ -700,24 +711,6 @@ struct net *get_net_ns_by_pid(pid_t pid) } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); -static __net_init int net_ns_net_init(struct net *net) -{ -#ifdef CONFIG_NET_NS - net->ns.ops = &netns_operations; -#endif - return ns_alloc_inum(&net->ns); -} - -static __net_exit void net_ns_net_exit(struct net *net) -{ - ns_free_inum(&net->ns); -} - -static struct pernet_operations __net_initdata net_ns_ops = { - .init = net_ns_net_init, - .exit = net_ns_net_exit, -}; - static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { [NETNSA_NONE] = { .type = NLA_UNSPEC }, [NETNSA_NSID] = { .type = NLA_S32 }, @@ -1097,6 +1090,8 @@ static int __init net_ns_init(void) panic("Could not create netns workq"); #endif + proc_alloc_inum(&init_net.ns.inum); + ng = net_alloc_generic(); if (!ng) panic("Could not allocate generic netns"); @@ -1114,9 +1109,6 @@ static int __init net_ns_init(void) init_net_initialized = true; up_write(&pernet_ops_rwsem); - if (register_pernet_subsys(&net_ns_ops)) - panic("Could not register network namespace subsystems"); - rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,