> On Nov 7, 2018, at 8:41 PM, NeilBrown <neilb@xxxxxxxx> wrote: > > On Wed, Nov 07 2018, Chuck Lever wrote: > >> Hi Neil- >> >> >>> On Nov 6, 2018, at 11:12 PM, NeilBrown <neilb@xxxxxxxx> wrote: >>> >>> 1/ discard 'struct unx_cred'. We don't need any data that >>> is not already in 'struct rpc_cred'. >>> 2/ Don't keep these creds in a hash table. When a credential >>> is needed, simply allocate it. When not needed, discard it. >>> This can easily be faster than performing a lookup on >>> a shared hash table. > > Thanks for the review Chuck! > >> >> What's the basis for this claim? A memory allocation disables and >> enables IRQs. That definitely hits a resource that is globally >> shared. > > My basis is not rock solid, but I was convinced :-) > > kmem_cache_alloc() does disable local irqs when slab.c is used. > slub.c doesn't disable them in the fast path which I *think* should be > reasonably common. > slob always takes a spinlock as well as disabling interrupts. > > I think slob is only recommended for tiny machines, and slub is > generally preferred, so I think that when performance matters, it will > still be delivered. > > It isn't clear to me why you consider a local irq to be "globally > shared" - assuming that is what you mean. Globally-shared in this case can be construed somewhat narrowly. If we assume slub, kmem_cache_alloc() touches resources that are used by all tasks running on that CPU, at least in the slow path. > Disabling local interrupts is not without cost, but I don't think the > cost increases with the number of CPUs, while the cost of accessing > shared memory (even without a spinlock) does. Point taken, but having a single mempool for all RPC transports and users is also going to be a shared resource that can bottleneck. I guess that's an argument in favor of building creds on demand rather than keeping them in a hash table, isn't it. :-) >> In addition, the comment near unx_marshal suggests we should >> cache the marshaled on-the-wire version of the credential instead >> of building it in the RPC Call buffer every time. That would >> require keeping the creds around. > > That comment has been there since 2.1.32 and has not be acted on. There > seems little reason to expect that to change. Caching doesn't seem to > have been found to be necessary in practice. > >> >> Have you measured a significant difference in throughput with >> this patch? Have you considered improving the lookup speed of >> the hash table by making the buckets into rb-trees, for example? > > No, I haven't measured. > I might have briefly considered changing to an rb-tree, but as the > current hashtable doesn't actually contain anything of value, I would > have quickly discarded the idea. > > If I wanted to further improve performance, I would look at ways to > bypass the "lookup_cred" step completely. > unx_marshal only needs the generic "struct cred", so there should be no > need to have a 'struct rpc_cred' at all. > > If this series is accepted, I'll (hopefully) look into seeing how > practical that is. > > Thanks again, > NeilBrown > > >> >> >>> As the lookup can happen during write-out, use a mempool >>> to ensure forward progress. >>> This means that we cannot compare two credentials for >>> equality by comparing the pointers, but we never do that anyway. >>> >>> Signed-off-by: NeilBrown <neilb@xxxxxxxx> >>> --- >>> net/sunrpc/auth.c | 1 >>> net/sunrpc/auth_unix.c | 101 +++++++++++++++--------------------------------- >>> 2 files changed, 32 insertions(+), 70 deletions(-) >>> >>> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c >>> index 867ea9834bde..a07a7c59d3a4 100644 >>> --- a/net/sunrpc/auth.c >>> +++ b/net/sunrpc/auth.c >>> @@ -651,6 +651,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred, >>> INIT_LIST_HEAD(&cred->cr_lru); >>> refcount_set(&cred->cr_count, 1); >>> cred->cr_auth = auth; >>> + cred->cr_flags = 0; >>> cred->cr_ops = ops; >>> cred->cr_expire = jiffies; >>> cred->cr_cred = get_cred(acred->cred); >>> diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c >>> index bff113a411e0..387f6b3ffbea 100644 >>> --- a/net/sunrpc/auth_unix.c >>> +++ b/net/sunrpc/auth_unix.c >>> @@ -11,16 +11,11 @@ >>> #include <linux/types.h> >>> #include <linux/sched.h> >>> #include <linux/module.h> >>> +#include <linux/mempool.h> >>> #include <linux/sunrpc/clnt.h> >>> #include <linux/sunrpc/auth.h> >>> #include <linux/user_namespace.h> >>> >>> -struct unx_cred { >>> - struct rpc_cred uc_base; >>> - kgid_t uc_gid; >>> - kgid_t uc_gids[UNX_NGROUPS]; >>> -}; >>> -#define uc_uid uc_base.cr_uid >>> >>> #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) >>> # define RPCDBG_FACILITY RPCDBG_AUTH >>> @@ -28,6 +23,7 @@ struct unx_cred { >>> >>> static struct rpc_auth unix_auth; >>> static const struct rpc_credops unix_credops; >>> +static mempool_t *unix_pool; >>> >>> static struct rpc_auth * >>> unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) >>> @@ -42,15 +38,6 @@ static void >>> unx_destroy(struct rpc_auth *auth) >>> { >>> dprintk("RPC: destroying UNIX authenticator %p\n", auth); >>> - rpcauth_clear_credcache(auth->au_credcache); >>> -} >>> - >>> -static int >>> -unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) >>> -{ >>> - return hash_64(from_kgid(&init_user_ns, acred->cred->fsgid) | >>> - ((u64)from_kuid(&init_user_ns, acred->cred->fsuid) << >>> - (sizeof(gid_t) * 8)), hashbits); >>> } >>> >>> /* >>> @@ -59,53 +46,24 @@ unx_hash_cred(struct auth_cred *acred, unsigned int hashbits) >>> static struct rpc_cred * >>> unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) >>> { >>> - return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); >>> -} >>> - >>> -static struct rpc_cred * >>> -unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) >>> -{ >>> - struct unx_cred *cred; >>> - unsigned int groups = 0; >>> - unsigned int i; >>> + struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); >>> >>> dprintk("RPC: allocating UNIX cred for uid %d gid %d\n", >>> from_kuid(&init_user_ns, acred->cred->fsuid), >>> from_kgid(&init_user_ns, acred->cred->fsgid)); >>> >>> - if (!(cred = kmalloc(sizeof(*cred), gfp))) >>> - return ERR_PTR(-ENOMEM); >>> - >>> - rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops); >>> - cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; >>> - >>> - if (acred->cred && acred->cred->group_info != NULL) >>> - groups = acred->cred->group_info->ngroups; >>> - if (groups > UNX_NGROUPS) >>> - groups = UNX_NGROUPS; >>> - >>> - cred->uc_gid = acred->cred->fsgid; >>> - for (i = 0; i < groups; i++) >>> - cred->uc_gids[i] = acred->cred->group_info->gid[i]; >>> - if (i < UNX_NGROUPS) >>> - cred->uc_gids[i] = INVALID_GID; >>> - >>> - return &cred->uc_base; >>> -} >>> - >>> -static void >>> -unx_free_cred(struct unx_cred *unx_cred) >>> -{ >>> - dprintk("RPC: unx_free_cred %p\n", unx_cred); >>> - put_cred(unx_cred->uc_base.cr_cred); >>> - kfree(unx_cred); >>> + rpcauth_init_cred(ret, acred, auth, &unix_credops); >>> + ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; >>> + return ret; >>> } >>> >>> static void >>> unx_free_cred_callback(struct rcu_head *head) >>> { >>> - struct unx_cred *unx_cred = container_of(head, struct unx_cred, uc_base.cr_rcu); >>> - unx_free_cred(unx_cred); >>> + struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu); >>> + dprintk("RPC: unx_free_cred %p\n", rpc_cred); >>> + put_cred(rpc_cred->cr_cred); >>> + mempool_free(rpc_cred, unix_pool); >>> } >>> >>> static void >>> @@ -115,30 +73,32 @@ unx_destroy_cred(struct rpc_cred *cred) >>> } >>> >>> /* >>> - * Match credentials against current process creds. >>> - * The root_override argument takes care of cases where the caller may >>> - * request root creds (e.g. for NFS swapping). >>> + * Match credentials against current the auth_cred. >>> */ >>> static int >>> -unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags) >>> +unx_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) >>> { >>> - struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base); >>> unsigned int groups = 0; >>> unsigned int i; >>> >>> + if (cred->cr_cred == acred->cred) >>> + return 1; >>> >>> - if (!uid_eq(cred->uc_uid, acred->cred->fsuid) || !gid_eq(cred->uc_gid, acred->cred->fsgid)) >>> + if (!uid_eq(cred->cr_cred->fsuid, acred->cred->fsuid) || !gid_eq(cred->cr_cred->fsgid, acred->cred->fsgid)) >>> return 0; >>> >>> if (acred->cred && acred->cred->group_info != NULL) >>> groups = acred->cred->group_info->ngroups; >>> if (groups > UNX_NGROUPS) >>> groups = UNX_NGROUPS; >>> + if (cred->cr_cred->group_info == NULL) >>> + return groups == 0; >>> + if (groups != cred->cr_cred->group_info->ngroups) >>> + return 0; >>> + >>> for (i = 0; i < groups ; i++) >>> - if (!gid_eq(cred->uc_gids[i], acred->cred->group_info->gid[i])) >>> + if (!gid_eq(cred->cr_cred->group_info->gid[i], acred->cred->group_info->gid[i])) >>> return 0; >>> - if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups])) >>> - return 0; >>> return 1; >>> } >>> >>> @@ -150,9 +110,10 @@ static __be32 * >>> unx_marshal(struct rpc_task *task, __be32 *p) >>> { >>> struct rpc_clnt *clnt = task->tk_client; >>> - struct unx_cred *cred = container_of(task->tk_rqstp->rq_cred, struct unx_cred, uc_base); >>> + struct rpc_cred *cred = task->tk_rqstp->rq_cred; >>> __be32 *base, *hold; >>> int i; >>> + struct group_info *gi = cred->cr_cred->group_info; >>> >>> *p++ = htonl(RPC_AUTH_UNIX); >>> base = p++; >>> @@ -163,11 +124,12 @@ unx_marshal(struct rpc_task *task, __be32 *p) >>> */ >>> p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); >>> >>> - *p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid)); >>> - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid)); >>> + *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid)); >>> + *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid)); >>> hold = p++; >>> - for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++) >>> - *p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i])); >>> + if (gi) >>> + for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++) >>> + *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i])); >>> *hold = htonl(p - hold - 1); /* gid array length */ >>> *base = htonl((p - base - 1) << 2); /* cred length */ >>> >>> @@ -214,12 +176,13 @@ unx_validate(struct rpc_task *task, __be32 *p) >>> >>> int __init rpc_init_authunix(void) >>> { >>> - return rpcauth_init_credcache(&unix_auth); >>> + unix_pool = mempool_create_kmalloc_pool(16, sizeof(struct rpc_cred)); >>> + return unix_pool ? 0 : -ENOMEM; >>> } >>> >>> void rpc_destroy_authunix(void) >>> { >>> - rpcauth_destroy_credcache(&unix_auth); >>> + mempool_destroy(unix_pool); >>> } >>> >>> const struct rpc_authops authunix_ops = { >>> @@ -228,9 +191,7 @@ const struct rpc_authops authunix_ops = { >>> .au_name = "UNIX", >>> .create = unx_create, >>> .destroy = unx_destroy, >>> - .hash_cred = unx_hash_cred, >>> .lookup_cred = unx_lookup_cred, >>> - .crcreate = unx_create_cred, >>> }; >>> >>> static >>> >>> >> >> -- >> Chuck Lever -- Chuck Lever