Quoting Eric W. Biederman (ebiederm@xxxxxxxxxxxx): > > When freeing a deeply nested user namespace free_user_ns calls > put_user_ns on it's parent which may in turn call free_user_ns again. > When -fno-optimize-sibling-calls is passed to gcc one stack frame per > user namespace is left on the stack, potentially overflowing the > kernel stack. CONFIG_FRAME_POINTER forces -fno-optimize-sibling-calls > so we can't count on gcc to optimize this code. > > Remove struct kref and use a plain atomic_t. Making the code more > flexible and easier to comprehend. Make the loop in free_user_ns > explict to guarantee that the stack does not overflow with > CONFIG_FRAME_POINTER enabled. > > I have tested this fix with a simple program that uses unshare to > create a deeply nested user namespace structure and then calls exit. > With 1000 nesteuser namespaces before this change running my test > program causes the kernel to die a horrible death. With 10,000,000 > nested user namespaces after this change my test program runs to > completion and causes no harm. > > Pointed-out-by: Vasily Kulikov <segoon@xxxxxxxxxxxx> Acked-by: Serge Hallyn <serge.hallyn@xxxxxxxxxxxxx> > Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> > --- > include/linux/user_namespace.h | 10 +++++----- > kernel/user.c | 4 +--- > kernel/user_namespace.c | 17 +++++++++-------- > 3 files changed, 15 insertions(+), 16 deletions(-) > > diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h > index b9bd2e6..4ce0093 100644 > --- a/include/linux/user_namespace.h > +++ b/include/linux/user_namespace.h > @@ -21,7 +21,7 @@ struct user_namespace { > struct uid_gid_map uid_map; > struct uid_gid_map gid_map; > struct uid_gid_map projid_map; > - struct kref kref; > + atomic_t count; > struct user_namespace *parent; > kuid_t owner; > kgid_t group; > @@ -35,18 +35,18 @@ extern struct user_namespace init_user_ns; > static inline struct user_namespace *get_user_ns(struct user_namespace *ns) > { > if (ns) > - kref_get(&ns->kref); > + atomic_inc(&ns->count); > return ns; > } > > extern int create_user_ns(struct cred *new); > extern int unshare_userns(unsigned long unshare_flags, struct cred **new_cred); > -extern void free_user_ns(struct kref *kref); > +extern void free_user_ns(struct user_namespace *ns); > > static inline void put_user_ns(struct user_namespace *ns) > { > - if (ns) > - kref_put(&ns->kref, free_user_ns); > + if (ns && atomic_dec_and_test(&ns->count)) > + free_user_ns(ns); > } > > struct seq_operations; > diff --git a/kernel/user.c b/kernel/user.c > index 33acb5e..57ebfd4 100644 > --- a/kernel/user.c > +++ b/kernel/user.c > @@ -47,9 +47,7 @@ struct user_namespace init_user_ns = { > .count = 4294967295U, > }, > }, > - .kref = { > - .refcount = ATOMIC_INIT(3), > - }, > + .count = ATOMIC_INIT(3), > .owner = GLOBAL_ROOT_UID, > .group = GLOBAL_ROOT_GID, > .proc_inum = PROC_USER_INIT_INO, > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index 2b042c4..24f8ec3 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -78,7 +78,7 @@ int create_user_ns(struct cred *new) > return ret; > } > > - kref_init(&ns->kref); > + atomic_set(&ns->count, 1); > /* Leave the new->user_ns reference with the new user namespace. */ > ns->parent = parent_ns; > ns->owner = owner; > @@ -104,15 +104,16 @@ int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) > return create_user_ns(cred); > } > > -void free_user_ns(struct kref *kref) > +void free_user_ns(struct user_namespace *ns) > { > - struct user_namespace *parent, *ns = > - container_of(kref, struct user_namespace, kref); > + struct user_namespace *parent; > > - parent = ns->parent; > - proc_free_inum(ns->proc_inum); > - kmem_cache_free(user_ns_cachep, ns); > - put_user_ns(parent); > + do { > + parent = ns->parent; > + proc_free_inum(ns->proc_inum); > + kmem_cache_free(user_ns_cachep, ns); > + ns = parent; > + } while (atomic_dec_and_test(&parent->count)); > } > EXPORT_SYMBOL(free_user_ns); > > -- > 1.7.5.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html