The subject is wrong. This should be: [RFC PATCH v2 2/8] Add a reference to ucounts for each cred. Further the explanation could use a little work. Something along the lines of: For RLIMIT_NPROC and some other rlimits the user_struct that holds the global limit is kept alive for the lifetime of a process by keeping it in struct cred. Add a ucounts reference to struct cred, so that RLIMIT_NPROC can switch from using a per user limit to using a per user per user namespace limit. Nits about the code below. Alexey Gladkov <gladkov.alexey@xxxxxxxxx> writes: > Before this, only the owner of the user namespace had an entry in ucounts. > This entry addressed the user in the given user namespace. > > Now we create such an entry in ucounts for all users in the user namespace. > Each user has only one entry for each user namespace. > > This commit is in preparation for migrating rlimits to ucounts. > > Signed-off-by: Alexey Gladkov <gladkov.alexey@xxxxxxxxx> > --- > include/linux/cred.h | 1 + > include/linux/user_namespace.h | 2 ++ > kernel/cred.c | 17 +++++++++++++++-- > kernel/ucount.c | 12 +++++++++++- > kernel/user_namespace.c | 1 + > 5 files changed, 30 insertions(+), 3 deletions(-) > > diff --git a/include/linux/cred.h b/include/linux/cred.h > index 18639c069263..307744fcc387 100644 > --- a/include/linux/cred.h > +++ b/include/linux/cred.h > @@ -144,6 +144,7 @@ struct cred { > #endif > struct user_struct *user; /* real user ID subscription */ > struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ > + struct ucounts *ucounts; > struct group_info *group_info; /* supplementary groups for euid/fsgid */ > /* RCU deletion */ > union { > diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h > index 84fefa9247c4..483568a56f7f 100644 > --- a/include/linux/user_namespace.h > +++ b/include/linux/user_namespace.h > @@ -102,6 +102,8 @@ bool setup_userns_sysctls(struct user_namespace *ns); > void retire_userns_sysctls(struct user_namespace *ns); > struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); > void dec_ucount(struct ucounts *ucounts, enum ucount_type type); > +void put_ucounts(struct ucounts *ucounts); > +void set_cred_ucounts(const struct cred *cred, struct user_namespace *ns, kuid_t uid); > > #ifdef CONFIG_USER_NS > > diff --git a/kernel/cred.c b/kernel/cred.c > index 421b1149c651..d19e2e97092c 100644 > --- a/kernel/cred.c > +++ b/kernel/cred.c > @@ -119,6 +119,7 @@ static void put_cred_rcu(struct rcu_head *rcu) > if (cred->group_info) > put_group_info(cred->group_info); > free_uid(cred->user); > + put_ucounts(cred->ucounts); > put_user_ns(cred->user_ns); > kmem_cache_free(cred_jar, cred); > } > @@ -144,6 +145,9 @@ void __put_cred(struct cred *cred) > BUG_ON(cred == current->cred); > BUG_ON(cred == current->real_cred); > > + BUG_ON(cred->ucounts == NULL); > + BUG_ON(cred->ucounts->ns != cred->user_ns); > + > if (cred->non_rcu) > put_cred_rcu(&cred->rcu); > else > @@ -271,6 +275,9 @@ struct cred *prepare_creds(void) > get_uid(new->user); > get_user_ns(new->user_ns); > > + new->ucounts = NULL; > + set_cred_ucounts(new, new->user_ns, new->euid); > + This hunk should be: atomic_inc(&new->count); That means you get to skip the lookup by uid and user_ns which while it should be cheap is completely unnecessary in this case. > #ifdef CONFIG_KEYS > key_get(new->session_keyring); > key_get(new->process_keyring); > @@ -363,6 +370,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) > ret = create_user_ns(new); > if (ret < 0) > goto error_put; > + set_cred_ucounts(new, new->user_ns, new->euid); > } > > #ifdef CONFIG_KEYS > @@ -485,8 +493,11 @@ int commit_creds(struct cred *new) > * in set_user(). > */ > alter_cred_subscribers(new, 2); > - if (new->user != old->user) > - atomic_inc(&new->user->processes); > + if (new->user != old->user || new->user_ns != old->user_ns) { > + if (new->user != old->user) > + atomic_inc(&new->user->processes); > + set_cred_ucounts(new, new->user_ns, new->euid); > + } > rcu_assign_pointer(task->real_cred, new); > rcu_assign_pointer(task->cred, new); > if (new->user != old->user) > @@ -661,6 +672,7 @@ void __init cred_init(void) > /* allocate a slab in which we can store credentials */ > cred_jar = kmem_cache_create("cred_jar", sizeof(struct cred), 0, > SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); > + set_cred_ucounts(&init_cred, &init_user_ns, GLOBAL_ROOT_UID); Unfortuantely this is needed here because this is the first cred and there is no ucount reference to copy. > } > > /** > @@ -704,6 +716,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) > get_uid(new->user); > get_user_ns(new->user_ns); > get_group_info(new->group_info); > + set_cred_ucounts(new, new->user_ns, new->euid); This hunk should be: atomic_inc(&new->count); > > #ifdef CONFIG_KEYS > new->session_keyring = NULL; > diff --git a/kernel/ucount.c b/kernel/ucount.c > index 0f2c7c11df19..80a39073bcef 100644 > --- a/kernel/ucount.c > +++ b/kernel/ucount.c > @@ -161,7 +161,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) > return ucounts; > } > > -static void put_ucounts(struct ucounts *ucounts) > +void put_ucounts(struct ucounts *ucounts) > { > unsigned long flags; > > @@ -175,6 +175,16 @@ static void put_ucounts(struct ucounts *ucounts) > kfree(ucounts); > } > > +void set_cred_ucounts(const struct cred *cred, struct user_namespace *ns, kuid_t uid) > +{ > + if (cred->ucounts) { > + if (cred->ucounts->ns == ns && uid_eq(cred->ucounts->uid, uid)) > + return; > + put_ucounts(cred->ucounts); > + } > + ((struct cred *) cred)->ucounts = get_ucounts(ns, uid); > +} > + That can become: void reset_cred_ucounts(struct cred *cred, struct user_namespace *ns, kuid_t uid) { struct ucounts *old = cred->ucounts; if (old && old->ns && uid_eq(old->uid, uid)) return; cred->ucounts = get_ucounts(ns, uid); if (old) put_ucounts(old); } Removing the const on struct cred will make any mistakes where you use this with anything except a brand new cred show up at compile time. Changing the tests around just makes it a little clearer what the code is doing. Changing the name emphasises that prepare_cred should not be using this only commit_cred and friends where the ucounts may have changed. > static inline bool atomic_inc_below(atomic_t *v, int u) > { > int c, old; > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index af612945a4d0..4b8a4468d391 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -1280,6 +1280,7 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns) > > put_user_ns(cred->user_ns); > set_cred_user_ns(cred, get_user_ns(user_ns)); > + set_cred_ucounts(cred, user_ns, cred->euid); > > return 0; > } _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers