Re: [PATCH RFC 7/9] NFSD: Use rhashtable for managing nfs4_file objects

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On Oct 6, 2022, at 12:12 PM, Jeff Layton <jlayton@xxxxxxxxxxxxxxx> wrote:
> 
> On Wed, 2022-10-05 at 15:11 +0000, Chuck Lever III wrote:
>> 
>>> On Oct 5, 2022, at 10:56 AM, Chuck Lever <chuck.lever@xxxxxxxxxx> wrote:
>>> 
>>> fh_match() is expensive to use for hash chains that contain more
>>> than a few objects. With common workloads, I see multiple thousands
>>> of objects stored in file_hashtbl[], which always has only 256
>>> buckets.
>>> 
>>> Replace it with an rhashtable, which dynamically resizes its bucket
>>> array to keep hash chains short.
>>> 
>>> This also enables the removal of the use of state_lock to serialize
>>> operations on the new rhashtable.
>>> 
>>> The result is an improvement in the latency of NFSv4 operations
>>> and the reduction of nfsd CPU utilization due to the cache misses
>>> of walking long hash chains in file_hashtbl.
>>> 
>>> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
>>> Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>
>>> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
>>> ---
>>> fs/nfsd/nfs4state.c |  229 +++++++++++++++++++++++++++++++++++----------------
>>> fs/nfsd/state.h     |    5 -
>>> 2 files changed, 158 insertions(+), 76 deletions(-)
>>> 
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index 2b850de288cf..06499b9481a6 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -44,7 +44,9 @@
>>> #include <linux/jhash.h>
>>> #include <linux/string_helpers.h>
>>> #include <linux/fsnotify.h>
>>> +#include <linux/rhashtable.h>
>>> #include <linux/nfs_ssc.h>
>>> +
>>> #include "xdr4.h"
>>> #include "xdr4cb.h"
>>> #include "vfs.h"
>>> @@ -84,6 +86,7 @@ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
>>> static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
>>> void nfsd4_end_grace(struct nfsd_net *nn);
>>> static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
>>> +static void unhash_nfs4_file(struct nfs4_file *fp);
>>> 
>>> /* Locking: */
>>> 
>>> @@ -577,11 +580,8 @@ static void nfsd4_free_file_rcu(struct rcu_head *rcu)
>>> void
>>> put_nfs4_file(struct nfs4_file *fi)
>>> {
>>> -	might_lock(&state_lock);
>>> -
>>> -	if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
>>> -		hlist_del_rcu(&fi->fi_hash);
>>> -		spin_unlock(&state_lock);
>>> +	if (refcount_dec_and_test(&fi->fi_ref)) {
>>> +		unhash_nfs4_file(fi);
>>> 		WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
>>> 		WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
>>> 		call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
>>> @@ -695,19 +695,85 @@ static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
>>> 	return ret & OWNER_HASH_MASK;
>>> }
>>> 
>>> -/* hash table for nfs4_file */
>>> -#define FILE_HASH_BITS                   8
>>> -#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
>>> +static struct rhashtable nfs4_file_rhashtbl ____cacheline_aligned_in_smp;
>>> 
>>> -static unsigned int file_hashval(struct svc_fh *fh)
>>> +/*
>>> + * The returned hash value is based solely on the address of an in-code
>>> + * inode, a pointer to a slab-allocated object. The entropy in such a
>>> + * pointer is concentrated in its middle bits.
>>> + */
>>> +static u32 nfs4_file_inode_hash(const struct inode *inode, u32 seed)
>>> +{
>>> +	unsigned long ptr = (unsigned long)inode;
>>> +	u32 k;
>>> +
>>> +	k = ptr >> L1_CACHE_SHIFT;
>>> +	k &= 0x00ffffff;
>>> +	return jhash2(&k, 1, seed);
>>> +}
>>> +
>>> +/**
>>> + * nfs4_file_key_hashfn - Compute the hash value of a lookup key
>>> + * @data: key on which to compute the hash value
>>> + * @len: rhash table's key_len parameter (unused)
>>> + * @seed: rhash table's random seed of the day
>>> + *
>>> + * Return value:
>>> + *   Computed 32-bit hash value
>>> + */
>>> +static u32 nfs4_file_key_hashfn(const void *data, u32 len, u32 seed)
>>> {
>>> -	struct inode *inode = d_inode(fh->fh_dentry);
>>> +	const struct svc_fh *fhp = data;
>>> 
>>> -	/* XXX: why not (here & in file cache) use inode? */
>>> -	return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
>>> +	return nfs4_file_inode_hash(d_inode(fhp->fh_dentry), seed);
>>> }
>>> 
>>> -static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
>>> +/**
>>> + * nfs4_file_obj_hashfn - Compute the hash value of an nfs4_file object
>>> + * @data: object on which to compute the hash value
>>> + * @len: rhash table's key_len parameter (unused)
>>> + * @seed: rhash table's random seed of the day
>>> + *
>>> + * Return value:
>>> + *   Computed 32-bit hash value
>>> + */
>>> +static u32 nfs4_file_obj_hashfn(const void *data, u32 len, u32 seed)
>>> +{
>>> +	const struct nfs4_file *fi = data;
>>> +
>>> +	return nfs4_file_inode_hash(fi->fi_inode, seed);
>>> +}
>>> +
>>> +/**
>>> + * nfs4_file_obj_cmpfn - Match a cache item against search criteria
>>> + * @arg: search criteria
>>> + * @ptr: cache item to check
>>> + *
>>> + * Return values:
>>> + *   %0 - Item matches search criteria
>>> + *   %1 - Item does not match search criteria
>>> + */
>>> +static int nfs4_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
>>> +			       const void *ptr)
>>> +{
>>> +	const struct svc_fh *fhp = arg->key;
>>> +	const struct nfs4_file *fi = ptr;
>>> +
>>> +	return fh_match(&fi->fi_fhandle, &fhp->fh_handle) ? 0 : 1;
>>> +}
>>> +
>>> +static const struct rhashtable_params nfs4_file_rhash_params = {
>>> +	.key_len		= sizeof_field(struct nfs4_file, fi_inode),
>>> +	.key_offset		= offsetof(struct nfs4_file, fi_inode),
>>> +	.head_offset		= offsetof(struct nfs4_file, fi_rhash),
>>> +	.hashfn			= nfs4_file_key_hashfn,
>>> +	.obj_hashfn		= nfs4_file_obj_hashfn,
>>> +	.obj_cmpfn		= nfs4_file_obj_cmpfn,
>>> +
>>> +	/* Reduce resizing churn on light workloads */
>>> +	.min_size		= 512,		/* buckets */
>>> +	.automatic_shrinking	= true,
>>> +};
>>> 
>>> /*
>>> * Check if courtesy clients have conflicting access and resolve it if possible
>>> @@ -4251,11 +4317,8 @@ static struct nfs4_file *nfsd4_alloc_file(void)
>>> }
>>> 
>>> /* OPEN Share state helper functions */
>>> -static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
>>> -				struct nfs4_file *fp)
>>> +static void init_nfs4_file(const struct svc_fh *fh, struct nfs4_file *fp)
>>> {
>>> -	lockdep_assert_held(&state_lock);
>>> -
>>> 	refcount_set(&fp->fi_ref, 1);
>>> 	spin_lock_init(&fp->fi_lock);
>>> 	INIT_LIST_HEAD(&fp->fi_stateids);
>>> @@ -4273,7 +4336,6 @@ static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
>>> 	INIT_LIST_HEAD(&fp->fi_lo_states);
>>> 	atomic_set(&fp->fi_lo_recalls, 0);
>>> #endif
>>> -	hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
>>> }
>>> 
>>> void
>>> @@ -4626,71 +4688,84 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
>>> 		nfs4_put_stid(&last->st_stid);
>>> }
>>> 
>>> -/* search file_hashtbl[] for file */
>>> -static struct nfs4_file *
>>> -find_file_locked(struct svc_fh *fh, unsigned int hashval)
>>> +static struct nfs4_file *find_nfs4_file(const struct svc_fh *fhp)
>>> {
>>> -	struct nfs4_file *fp;
>>> +	struct nfs4_file *fi;
>>> 
>>> -	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
>>> -				lockdep_is_held(&state_lock)) {
>>> -		if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
>>> -			if (refcount_inc_not_zero(&fp->fi_ref))
>>> -				return fp;
>>> -		}
>>> -	}
>>> -	return NULL;
>>> +	rcu_read_lock();
>>> +	fi = rhashtable_lookup(&nfs4_file_rhashtbl, fhp,
>>> +			       nfs4_file_rhash_params);
>>> +	if (fi)
>>> +		if (!refcount_inc_not_zero(&fi->fi_ref))
>>> +			fi = NULL;
>>> +	rcu_read_unlock();
>>> +	return fi;
>>> }
>>> 
>>> -static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
>>> -				     unsigned int hashval)
>>> +static void check_nfs4_file_aliases_locked(struct nfs4_file *new,
>>> +					   const struct svc_fh *fhp)
>>> {
>>> -	struct nfs4_file *fp;
>>> -	struct nfs4_file *ret = NULL;
>>> -	bool alias_found = false;
>>> +	struct rhashtable *ht = &nfs4_file_rhashtbl;
>>> +	struct rhash_lock_head __rcu *const *bkt;
>>> +	struct rhashtable_compare_arg arg = {
>>> +		.ht	= ht,
>>> +		.key	= fhp,
>>> +	};
>>> +	struct bucket_table *tbl;
>>> +	struct rhash_head *he;
>>> +	unsigned int hash;
>>> 
>>> -	spin_lock(&state_lock);
>>> -	hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
>>> -				 lockdep_is_held(&state_lock)) {
>>> -		if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
>>> -			if (refcount_inc_not_zero(&fp->fi_ref))
>>> -				ret = fp;
>>> -		} else if (d_inode(fh->fh_dentry) == fp->fi_inode)
>>> -			fp->fi_aliased = alias_found = true;
>>> -	}
>>> -	if (likely(ret == NULL)) {
>>> -		nfsd4_init_file(fh, hashval, new);
>>> -		new->fi_aliased = alias_found;
>>> -		ret = new;
>>> +	/*
>>> +	 * rhashtable guarantees small buckets, thus this loop stays
>>> +	 * efficient.
>>> +	 */
>>> +	rcu_read_lock();
>>> +	tbl = rht_dereference_rcu(ht->tbl, ht);
>>> +	hash = rht_key_hashfn(ht, tbl, fhp, nfs4_file_rhash_params);
>>> +	bkt = rht_bucket(tbl, hash);
>>> +	rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
>>> +		struct nfs4_file *fi;
>>> +
>>> +		fi = rht_obj(ht, he);
>>> +		if (nfs4_file_obj_cmpfn(&arg, fi) == 0)
>>> +			continue;
>>> +		if (d_inode(fhp->fh_dentry) == fi->fi_inode) {
>>> +			fi->fi_aliased = true;
>>> +			new->fi_aliased = true;
>>> +		}
>>> 	}
>>> -	spin_unlock(&state_lock);
>>> -	return ret;
>>> +	rcu_read_unlock();
>>> }
>>> 
>>> -static struct nfs4_file * find_file(struct svc_fh *fh)
>>> +static noinline struct nfs4_file *
>>> +find_or_hash_nfs4_file(struct nfs4_file *new, const struct svc_fh *fhp)
>>> {
>>> -	struct nfs4_file *fp;
>>> -	unsigned int hashval = file_hashval(fh);
>>> +	struct nfs4_file *fi;
>>> 
>>> -	rcu_read_lock();
>>> -	fp = find_file_locked(fh, hashval);
>>> -	rcu_read_unlock();
>>> -	return fp;
>>> -}
>>> +	init_nfs4_file(fhp, new);
>>> 
>>> -static struct nfs4_file *
>>> -find_or_add_file(struct nfs4_file *new, struct svc_fh *fh)
>>> -{
>>> -	struct nfs4_file *fp;
>>> -	unsigned int hashval = file_hashval(fh);
>>> +	fi = rhashtable_lookup_get_insert_key(&nfs4_file_rhashtbl,
>>> +					      fhp, &new->fi_rhash,
>>> +					      nfs4_file_rhash_params);
>>> +	if (!fi) {
>>> +		fi = new;
>>> +		goto check_aliases;
>>> +	}
>>> +	if (IS_ERR(fi))		/* or BUG? */
>>> +		return NULL;
>>> +	if (!refcount_inc_not_zero(&fi->fi_ref))
>>> +		fi = new;
>> 
>> Ah, hrm. Given what we just had to do to nfsd_file_do_acquire(),
>> maybe this needs the same fix to hang onto the RCU read lock
>> while dicking with the nfs4_file object's reference count?
>> 
>> 
> 
> Yes. Probably we should just merge this patch if you want a fix for
> mainline:
> 
>    nfsd: rework hashtable handling in nfsd_do_file_acquire

It's queued up. I intend to submit it before leaving for Westford.

As for the file_hashtbl, I have fixed that up to be consistent
with the approach in nfsd_file_do_acquire(), and will post a
refresh in a moment.


>>> -	rcu_read_lock();
>>> -	fp = find_file_locked(fh, hashval);
>>> -	rcu_read_unlock();
>>> -	if (fp)
>>> -		return fp;
>>> +check_aliases:
>>> +	check_nfs4_file_aliases_locked(fi, fhp);
>>> +
>>> +	return fi;
>>> +}
>>> 
>>> -	return insert_file(new, fh, hashval);
>>> +static void unhash_nfs4_file(struct nfs4_file *fi)
>>> +{
>>> +	rhashtable_remove_fast(&nfs4_file_rhashtbl, &fi->fi_rhash,
>>> +			       nfs4_file_rhash_params);
>>> }
>>> 
>>> /*
>>> @@ -4703,9 +4778,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
>>> 	struct nfs4_file *fp;
>>> 	__be32 ret = nfs_ok;
>>> 
>>> -	fp = find_file(current_fh);
>>> +	fp = find_nfs4_file(current_fh);
>>> 	if (!fp)
>>> 		return ret;
>>> +
>>> 	/* Check for conflicting share reservations */
>>> 	spin_lock(&fp->fi_lock);
>>> 	if (fp->fi_share_deny & deny_type)
>>> @@ -5548,7 +5624,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
>>> 	 * and check for delegations in the process of being recalled.
>>> 	 * If not found, create the nfs4_file struct
>>> 	 */
>>> -	fp = find_or_add_file(open->op_file, current_fh);
>>> +	fp = find_or_hash_nfs4_file(open->op_file, current_fh);
>>> +	if (unlikely(!fp))
>>> +		return nfserr_jukebox;
>>> 	if (fp != open->op_file) {
>>> 		status = nfs4_check_deleg(cl, open, &dp);
>>> 		if (status)
>>> @@ -7905,10 +7983,16 @@ nfs4_state_start(void)
>>> {
>>> 	int ret;
>>> 
>>> -	ret = nfsd4_create_callback_queue();
>>> +	ret = rhashtable_init(&nfs4_file_rhashtbl, &nfs4_file_rhash_params);
>>> 	if (ret)
>>> 		return ret;
>>> 
>>> +	ret = nfsd4_create_callback_queue();
>>> +	if (ret) {
>>> +		rhashtable_destroy(&nfs4_file_rhashtbl);
>>> +		return ret;
>>> +	}
>>> +
>>> 	set_max_delegations();
>>> 	return 0;
>>> }
>>> @@ -7939,6 +8023,7 @@ nfs4_state_shutdown_net(struct net *net)
>>> 
>>> 	nfsd4_client_tracking_exit(net);
>>> 	nfs4_state_destroy_net(net);
>>> +	rhashtable_destroy(&nfs4_file_rhashtbl);
>>> #ifdef CONFIG_NFSD_V4_2_INTER_SSC
>>> 	nfsd4_ssc_shutdown_umount(nn);
>>> #endif
>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>> index ae596dbf8667..879f085bc39e 100644
>>> --- a/fs/nfsd/state.h
>>> +++ b/fs/nfsd/state.h
>>> @@ -536,16 +536,13 @@ struct nfs4_clnt_odstate {
>>> * inode can have multiple filehandles associated with it, so there is
>>> * (potentially) a many to one relationship between this struct and struct
>>> * inode.
>>> - *
>>> - * These are hashed by filehandle in the file_hashtbl, which is protected by
>>> - * the global state_lock spinlock.
>>> */
>>> struct nfs4_file {
>>> 	refcount_t		fi_ref;
>>> 	struct inode *		fi_inode;
>>> 	bool			fi_aliased;
>>> 	spinlock_t		fi_lock;
>>> -	struct hlist_node       fi_hash;	/* hash on fi_fhandle */
>>> +	struct rhash_head	fi_rhash;
>>> 	struct list_head        fi_stateids;
>>> 	union {
>>> 		struct list_head	fi_delegations;
>>> 
>>> 
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@xxxxxxxxxxxxxxx>

--
Chuck Lever







[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux