From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> On exiting of the last task in a namespace we need to trigger freeing of the namespace. Currently, we call synchronize_rcu() and free_nsproxy() directly on do_exit() path. On my machine synchronize_rcu() blocks for about 0.01 seconds. For comparing: normal exit_group() syscall takes less than 0.0003 seconds. Let's offload synchronize_rcu() and free_nsproxy() to a workqueue. I also move synchronize_rcu() inside free_nsproxy(). It fixes racy put_nsproxy() which calls free_nsproxy() without synchronize_rcu(). I guess it was missed during switch to RCU (see cf7b708). Microbenchmark: : #define _GNU_SOURCE : #include <unistd.h> : #include <sched.h> : #include <stdlib.h> : #include <sys/wait.h> : : int : main(void) : { : int i; : for (i = 0; i < 1024; i++) { : if (fork()) { : wait(NULL); : continue; : } : unshare(CLONE_NEWIPC); : exit(0); : } : return 0; : } Before the patch: real 0m8.335s user 0m0.000s sys 0m0.265s After: real 0m0.569s user 0m0.001s sys 0m0.154s Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Acked-by: Serge E. Hallyn <serge.hallyn@xxxxxxxxxx> --- v2: - Updated description. --- include/linux/nsproxy.h | 1 + kernel/nsproxy.c | 34 +++++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index cc37a55..1d26be7 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -24,6 +24,7 @@ struct fs_struct; */ struct nsproxy { atomic_t count; + struct work_struct free_nsproxy_work; struct uts_namespace *uts_ns; struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index b576f7f..ebc7d40 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -41,13 +41,17 @@ struct nsproxy init_nsproxy = { #endif }; +static void free_nsproxy_work(struct work_struct *work); + static inline struct nsproxy *create_nsproxy(void) { struct nsproxy *nsproxy; nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); - if (nsproxy) + if (nsproxy) { atomic_set(&nsproxy->count, 1); + INIT_WORK(&nsproxy->free_nsproxy_work, free_nsproxy_work); + } return nsproxy; } @@ -166,6 +170,14 @@ out: void free_nsproxy(struct nsproxy *ns) { + /* + * wait for others to get what they want from this nsproxy. + * + * cannot release this nsproxy via the call_rcu() since + * put_mnt_ns() will want to sleep + */ + synchronize_rcu(); + if (ns->mnt_ns) put_mnt_ns(ns->mnt_ns); if (ns->uts_ns) @@ -178,6 +190,14 @@ void free_nsproxy(struct nsproxy *ns) kmem_cache_free(nsproxy_cachep, ns); } +static void free_nsproxy_work(struct work_struct *work) +{ + struct nsproxy *ns = container_of(work, struct nsproxy, + free_nsproxy_work); + + free_nsproxy(ns); +} + /* * Called from unshare. Unshare all the namespaces part of nsproxy. * On success, returns the new nsproxy. @@ -215,16 +235,8 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) rcu_assign_pointer(p->nsproxy, new); - if (ns && atomic_dec_and_test(&ns->count)) { - /* - * wait for others to get what they want from this nsproxy. - * - * cannot release this nsproxy via the call_rcu() since - * put_mnt_ns() will want to sleep - */ - synchronize_rcu(); - free_nsproxy(ns); - } + if (ns && atomic_dec_and_test(&ns->count)) + schedule_work(&ns->free_nsproxy_work); } void exit_task_namespaces(struct task_struct *p) -- 1.7.7.6 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers