On 07/20/2016 04:21 AM, Eric W. Biederman wrote: > Export the export the maximum number of user namespaces as > /proc/sys/userns/max_user_namespaces. > > Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> > --- > include/linux/user_namespace.h | 2 ++ > kernel/fork.c | 2 ++ > kernel/user_namespace.c | 68 +++++++++++++++++++++++++++++++++++++----- > 3 files changed, 64 insertions(+), 8 deletions(-) > > diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h > index 7d59af1f08f1..ba6a995178f9 100644 > --- a/include/linux/user_namespace.h > +++ b/include/linux/user_namespace.h > @@ -43,6 +43,8 @@ struct user_namespace { > struct ctl_table_set set; > struct ctl_table_header *sysctls; > #endif > + int max_user_namespaces; > + atomic_t user_namespaces; > }; > > extern struct user_namespace init_user_ns; > diff --git a/kernel/fork.c b/kernel/fork.c > index 5c2c355aa97f..95d5498c463f 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -323,6 +323,8 @@ void __init fork_init(void) > init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; > init_task.signal->rlim[RLIMIT_SIGPENDING] = > init_task.signal->rlim[RLIMIT_NPROC]; > + > + init_user_ns.max_user_namespaces = max_threads; > } > > int __weak arch_dup_task_struct(struct task_struct *dst, > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index b5b5cbb8fe92..a3cacf3f4747 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -63,7 +63,18 @@ static struct ctl_table_root set_root = { > .permissions = set_permissions, > }; > > +static int zero = 0; > +static int max = INT_MAX - 1; > static struct ctl_table userns_table[] = { > + { > + .procname = "max_user_namespaces", > + .data = &init_user_ns.max_user_namespaces, > + .maxlen = sizeof(init_user_ns.max_user_namespaces), > + .mode = 0644, > + .proc_handler = proc_dointvec_minmax, > + .extra1 = &zero, > + .extra2 = &max, > + }, > { } > }; > #endif /* CONFIG_SYSCTL */ > @@ -75,6 +86,8 @@ static bool setup_userns_sysctls(struct user_namespace *ns) > setup_sysctl_set(&ns->set, &set_root, set_is_seen); > tbl = kmemdup(userns_table, sizeof(userns_table), GFP_KERNEL); > if (tbl) { > + tbl[0].data = &ns->max_user_namespaces; > + > ns->sysctls = __register_sysctl_table(&ns->set, "userns", tbl); > } > if (!ns->sysctls) { > @@ -113,6 +126,34 @@ static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) > cred->user_ns = user_ns; > } > > +static bool inc_user_namespaces(struct user_namespace *ns) > +{ > + struct user_namespace *pos, *bad; > + for (pos = ns; pos; pos = pos->parent) { > + int max = READ_ONCE(pos->max_user_namespaces); > + int sum = atomic_inc_return(&pos->user_namespaces); > + if (sum > max) > + goto fail; > + } > + return true; > +fail: > + bad = pos; > + atomic_dec(&pos->user_namespaces); > + for (pos = ns; pos != bad; pos = pos->parent) > + atomic_dec(&pos->user_namespaces); > + > + return false; > +} > + > +static void dec_user_namespaces(struct user_namespace *ns) > +{ > + struct user_namespace *pos; > + for (pos = ns; pos; pos = pos->parent) { > + int dec = atomic_dec_if_positive(&pos->user_namespaces); > + WARN_ON_ONCE(dec < 0); > + } > +} Here you are essentially duplicating the logic of page_counter_try_charge and page_counter_uncharge. Perhaps the code of the page_counter can be moved away from mm/ and into kernel and the renamed to something more generic? E.g. the removed resource_counters or maybe "hierarchical counters"? If more namespaces are added in the future we certainly wouldn't want proliferation of pairs of such hierarchical inc/dec functions. > + > /* > * Create a new user namespace, deriving the creator from the user in the > * passed credentials, and replacing that user with the new root user for the > @@ -128,8 +169,12 @@ int create_user_ns(struct cred *new) > kgid_t group = new->egid; > int ret; > > + ret = -EUSERS; > if (parent_ns->level > 32) > - return -EUSERS; > + goto fail; > + > + if (!inc_user_namespaces(parent_ns)) > + goto fail; I think it would be better if the simple "return -EUSERS" semantics is saved and the "fail" label entirely nuked. In this case you only leave the more complex error handling. > > /* > * Verify that we can not violate the policy of which files > @@ -137,26 +182,27 @@ int create_user_ns(struct cred *new) > * by verifing that the root directory is at the root of the > * mount namespace which allows all files to be accessed. > */ > + ret = -EPERM; > if (current_chrooted()) > - return -EPERM; > + goto fail_dec; > > /* The creator needs a mapping in the parent user namespace > * or else we won't be able to reasonably tell userspace who > * created a user_namespace. > */ > + ret = -EPERM; > if (!kuid_has_mapping(parent_ns, owner) || > !kgid_has_mapping(parent_ns, group)) > - return -EPERM; > + goto fail_dec; > > + ret = -ENOMEM; > ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); > if (!ns) > - return -ENOMEM; > + goto fail_dec; > > ret = ns_alloc_inum(&ns->ns); > - if (ret) { > - kmem_cache_free(user_ns_cachep, ns); > - return ret; > - } > + if (ret) > + goto fail_free; > ns->ns.ops = &userns_operations; > > atomic_set(&ns->count, 1); > @@ -165,6 +211,7 @@ int create_user_ns(struct cred *new) > ns->level = parent_ns->level + 1; > ns->owner = owner; > ns->group = group; > + ns->max_user_namespaces = INT_MAX - 1; Do we want an INT_MAX -1 default initialization or inheritance from parent's value? > > /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ > mutex_lock(&userns_state_mutex); > @@ -185,7 +232,11 @@ fail_keyring: > key_put(ns->persistent_keyring_register); > #endif > ns_free_inum(&ns->ns); > +fail_free: > kmem_cache_free(user_ns_cachep, ns); > +fail_dec: > + dec_user_namespaces(parent_ns); > +fail: > return ret; > } > > @@ -221,6 +272,7 @@ void free_user_ns(struct user_namespace *ns) > #endif > ns_free_inum(&ns->ns); > kmem_cache_free(user_ns_cachep, ns); > + dec_user_namespaces(parent); > ns = parent; > } while (atomic_dec_and_test(&parent->count)); > } > _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers