On Tue, 25 Oct 2022, Chuck Lever wrote: > fh_match() is costly, especially when filehandles are large (as is > the case for NFSv4). It needs to be used sparingly when searching > data structures. Unfortunately, with common workloads, I see > multiple thousands of objects stored in file_hashtbl[], which always > has only 256 buckets, which makes the hash chains quite lengthy. > > Walking long hash chains with the state_lock held blocks other > activity that needs that lock. > > To help mitigate the cost of searching with fh_match(), replace the > nfs4_file hash table with an rhashtable, which can dynamically > resize its bucket array to minimize hash chain length. The ideal for > this use case is one bucket per inode. > > The result is an improvement in the latency of NFSv4 operations > and the reduction of nfsd CPU utilization due to the cost of > fh_match() and the CPU cache misses incurred while walking long > hash chains in the nfs4_file hash table. > > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > --- > fs/nfsd/nfs4state.c | 64 +++++++++++++++++++++++++-------------------------- > fs/nfsd/state.h | 4 --- > 2 files changed, 31 insertions(+), 37 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 681cb2daa843..5b90e5a6a04f 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -591,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) > void > put_nfs4_file(struct nfs4_file *fi) > { > - might_lock(&state_lock); > - > - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { > + if (refcount_dec_and_test(&fi->fi_ref)) { > remove_nfs4_file_locked(fi); > - spin_unlock(&state_lock); > WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); > WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); > call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); > @@ -709,20 +706,6 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) > return ret & OWNER_HASH_MASK; > } > > -/* hash table for nfs4_file */ > -#define FILE_HASH_BITS 8 > -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) > - > -static unsigned int file_hashval(const struct svc_fh *fh) > -{ > - struct inode *inode = d_inode(fh->fh_dentry); > - > - /* XXX: why not (here & in file cache) use inode? */ > - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); > -} > - > -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; > - > static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; > > static const struct rhashtable_params nfs4_file_rhash_params = { > @@ -4683,12 +4666,13 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) > static noinline_for_stack struct nfs4_file * > find_nfs4_file(const struct svc_fh *fhp) > { > - unsigned int hashval = file_hashval(fhp); > + struct rhlist_head *tmp, *list; > struct nfs4_file *fi = NULL; > > rcu_read_lock(); > - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, > - lockdep_is_held(&state_lock)) { > + list = rhltable_lookup(&nfs4_file_rhltable, fhp, > + nfs4_file_rhash_params); > + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { > if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { > if (!refcount_inc_not_zero(&fi->fi_ref)) > fi = NULL; > @@ -4708,33 +4692,45 @@ find_nfs4_file(const struct svc_fh *fhp) > static noinline_for_stack struct nfs4_file * > insert_nfs4_file(struct nfs4_file *new, const struct svc_fh *fhp) > { > - unsigned int hashval = file_hashval(fhp); > + struct rhlist_head *tmp, *list; > struct nfs4_file *ret = NULL; > bool alias_found = false; > struct nfs4_file *fi; > + int err; > > - spin_lock(&state_lock); > - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, > - lockdep_is_held(&state_lock)) { > + rcu_read_lock(); > + As mentioned separately, we need some sort of lock around this loop and the later insert. > + list = rhltable_lookup(&nfs4_file_rhltable, fhp, > + nfs4_file_rhash_params); > + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { > if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { > if (refcount_inc_not_zero(&fi->fi_ref)) > ret = fi; > } else if (d_inode(fhp->fh_dentry) == fi->fi_inode) > fi->fi_aliased = alias_found = true; This d_inde)( == fi->fi_inode test is no longer relevant. Everything in the list must have the same inode. Best remove it. Thanks, NeilBrown > } > - if (likely(ret == NULL)) { > - init_nfs4_file(fhp, new); > - hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); > - new->fi_aliased = alias_found; > - ret = new; > - } > - spin_unlock(&state_lock); > + if (ret) > + goto out_unlock; > + > + init_nfs4_file(fhp, new); > + err = rhltable_insert_key(&nfs4_file_rhltable, fhp, > + &new->fi_rlist, > + nfs4_file_rhash_params); > + if (err) > + goto out_unlock; > + > + new->fi_aliased = alias_found; > + ret = new; > + > +out_unlock: > + rcu_read_unlock(); > return ret; > } > > static noinline_for_stack void remove_nfs4_file_locked(struct nfs4_file *fi) > { > - hlist_del_rcu(&fi->fi_hash); > + rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, > + nfs4_file_rhash_params); > } > > /* > @@ -5624,6 +5620,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf > * If not found, create the nfs4_file struct > */ > fp = insert_nfs4_file(open->op_file, current_fh); > + if (unlikely(!fp)) > + return nfserr_jukebox; > if (fp != open->op_file) { > status = nfs4_check_deleg(cl, open, &dp); > if (status) > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 190fc7e418a4..eadd7f465bf5 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -536,16 +536,12 @@ struct nfs4_clnt_odstate { > * inode can have multiple filehandles associated with it, so there is > * (potentially) a many to one relationship between this struct and struct > * inode. > - * > - * These are hashed by filehandle in the file_hashtbl, which is protected by > - * the global state_lock spinlock. > */ > struct nfs4_file { > refcount_t fi_ref; > struct inode * fi_inode; > bool fi_aliased; > spinlock_t fi_lock; > - struct hlist_node fi_hash; /* hash on fi_fhandle */ > struct rhlist_head fi_rlist; > struct list_head fi_stateids; > union { > > >