On Fri, 2022-10-28 at 10:48 -0400, Chuck Lever wrote: > fh_match() is costly, especially when filehandles are large (as is > the case for NFSv4). It needs to be used sparingly when searching > data structures. Unfortunately, with common workloads, I see > multiple thousands of objects stored in file_hashtbl[], which has > just 256 buckets, making its bucket hash chains quite lengthy. > > Walking long hash chains with the state_lock held blocks other > activity that needs that lock. Sizable hash chains are a common > occurrance once the server has handed out some delegations, for > example -- IIUC, each delegated file is held open on the server by > an nfs4_file object. > > To help mitigate the cost of searching with fh_match(), replace the > nfs4_file hash table with an rhashtable, which can dynamically > resize its bucket array to minimize hash chain length. > > The result of this modification is an improvement in the latency of > NFSv4 operations, and the reduction of nfsd CPU utilization due to > eliminating the cost of multiple calls to fh_match() and reducing > the CPU cache misses incurred while walking long hash chains in the > nfs4_file hash table. > > Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> > Reviewed-by: NeilBrown <neilb@xxxxxxx> > --- > fs/nfsd/nfs4state.c | 77 ++++++++++++++++++++++++++------------------------- > fs/nfsd/state.h | 4 --- > 2 files changed, 40 insertions(+), 41 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index c2ef2db9c84c..c78b66e678dd 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -591,11 +591,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu) > void > put_nfs4_file(struct nfs4_file *fi) > { > - might_lock(&state_lock); > - > - if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) { > + if (refcount_dec_and_test(&fi->fi_ref)) { > nfsd4_file_hash_remove(fi); > - spin_unlock(&state_lock); > WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); > WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); > call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); > @@ -709,20 +706,6 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) > return ret & OWNER_HASH_MASK; > } > > -/* hash table for nfs4_file */ > -#define FILE_HASH_BITS 8 > -#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) > - > -static unsigned int file_hashval(const struct svc_fh *fh) > -{ > - struct inode *inode = d_inode(fh->fh_dentry); > - > - /* XXX: why not (here & in file cache) use inode? */ > - return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS); > -} > - > -static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; > - > static struct rhltable nfs4_file_rhltable ____cacheline_aligned_in_smp; > > static const struct rhashtable_params nfs4_file_rhash_params = { > @@ -4687,12 +4670,14 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) > static noinline_for_stack struct nfs4_file * > nfsd4_file_hash_lookup(const struct svc_fh *fhp) > { > - unsigned int hashval = file_hashval(fhp); > + struct inode *inode = d_inode(fhp->fh_dentry); > + struct rhlist_head *tmp, *list; > struct nfs4_file *fi; > > rcu_read_lock(); > - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, > - lockdep_is_held(&state_lock)) { > + list = rhltable_lookup(&nfs4_file_rhltable, &inode, > + nfs4_file_rhash_params); > + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { > if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { > if (refcount_inc_not_zero(&fi->fi_ref)) { > rcu_read_unlock(); > @@ -4706,40 +4691,56 @@ nfsd4_file_hash_lookup(const struct svc_fh *fhp) > > /* > * On hash insertion, identify entries with the same inode but > - * distinct filehandles. They will all be in the same hash bucket > - * because nfs4_file's are hashed by the address in the fi_inode > - * field. > + * distinct filehandles. They will all be on the list returned > + * by rhltable_lookup(). > + * > + * inode->i_lock prevents racing insertions from adding an entry > + * for the same inode/fhp pair twice. > */ > static noinline_for_stack struct nfs4_file * > nfsd4_file_hash_insert(struct nfs4_file *new, const struct svc_fh *fhp) > { > - unsigned int hashval = file_hashval(fhp); > + struct inode *inode = d_inode(fhp->fh_dentry); > + struct rhlist_head *tmp, *list; > struct nfs4_file *ret = NULL; > bool alias_found = false; > struct nfs4_file *fi; > + int err; > > - spin_lock(&state_lock); > - hlist_for_each_entry_rcu(fi, &file_hashtbl[hashval], fi_hash, > - lockdep_is_held(&state_lock)) { > + rcu_read_lock(); > + spin_lock(&inode->i_lock); > + > + list = rhltable_lookup(&nfs4_file_rhltable, &inode, > + nfs4_file_rhash_params); > + rhl_for_each_entry_rcu(fi, tmp, list, fi_rlist) { > if (fh_match(&fi->fi_fhandle, &fhp->fh_handle)) { > if (refcount_inc_not_zero(&fi->fi_ref)) > ret = fi; > - } else if (d_inode(fhp->fh_dentry) == fi->fi_inode) > + } else > fi->fi_aliased = alias_found = true; > } > - if (likely(ret == NULL)) { > - nfsd4_file_init(fhp, new); > - hlist_add_head_rcu(&new->fi_hash, &file_hashtbl[hashval]); > - new->fi_aliased = alias_found; > - ret = new; > - } > - spin_unlock(&state_lock); > + if (ret) > + goto out_unlock; > + > + nfsd4_file_init(fhp, new); > + err = rhltable_insert(&nfs4_file_rhltable, &new->fi_rlist, > + nfs4_file_rhash_params); > + if (err) > + goto out_unlock; > + > + new->fi_aliased = alias_found; > + ret = new; > + > +out_unlock: > + spin_unlock(&inode->i_lock); > + rcu_read_unlock(); > return ret; > } > > static noinline_for_stack void nfsd4_file_hash_remove(struct nfs4_file *fi) > { > - hlist_del_rcu(&fi->fi_hash); > + rhltable_remove(&nfs4_file_rhltable, &fi->fi_rlist, > + nfs4_file_rhash_params); > } > > /* > @@ -5629,6 +5630,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf > * If not found, create the nfs4_file struct > */ > fp = nfsd4_file_hash_insert(open->op_file, current_fh); > + if (unlikely(!fp)) > + return nfserr_jukebox; > if (fp != open->op_file) { > status = nfs4_check_deleg(cl, open, &dp); > if (status) > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 190fc7e418a4..eadd7f465bf5 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -536,16 +536,12 @@ struct nfs4_clnt_odstate { > * inode can have multiple filehandles associated with it, so there is > * (potentially) a many to one relationship between this struct and struct > * inode. > - * > - * These are hashed by filehandle in the file_hashtbl, which is protected by > - * the global state_lock spinlock. > */ > struct nfs4_file { > refcount_t fi_ref; > struct inode * fi_inode; > bool fi_aliased; > spinlock_t fi_lock; > - struct hlist_node fi_hash; /* hash on fi_fhandle */ > struct rhlist_head fi_rlist; > struct list_head fi_stateids; > union { > > You can add this to 13 and 14: Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> That said, I'd probably prefer to see the two squashed together, since you're adding unused infrastructure in 13.