Re: [PATCH RFC] NFSD: Convert filecache to rhltable

Chuck Lever III <chuck.lever@xxxxxxxxxx> · Mon, 30 Jan 2023 18:10:02 +0000

> On Jan 23, 2023, at 4:38 PM, Jeff Layton <jlayton@xxxxxxxxxx> wrote:
> 
> On Mon, 2023-01-23 at 20:34 +0000, Chuck Lever III wrote:
>> 
>>> On Jan 23, 2023, at 3:15 PM, Jeff Layton <jlayton@xxxxxxxxxx> wrote:
>>> 
>>> On Thu, 2023-01-05 at 09:58 -0500, Chuck Lever wrote:
>>>> From: Chuck Lever <chuck.lever@xxxxxxxxxx>
>>>> 
>>>> While we were converting the nfs4_file hashtable to use the kernel's
>>>> resizable hashtable data structure, Neil Brown observed that the
>>>> list variant (rhltable) would be better for managing nfsd_file items
>>>> as well. The nfsd_file hash table will contain multiple entries for
>>>> the same inode -- these should be kept together on a list. And, it
>>>> could be possible for exotic or malicious client behavior to cause
>>>> the hash table to resize itself on every insertion.
>>>> 
>>>> A nice simplification is that rhltable_lookup() can return a list
>>>> that contains only nfsd_file items that match a given inode, which
>>>> enables us to eliminate specialized hash table helper functions and
>>>> use the default functions provided by the rhashtable implementation).
>>>> 
>>>> Since we are now storing nfsd_file items for the same inode on a
>>>> single list, that effectively reduces the number of hash entries
>>>> that have to be tracked in the hash table. The mininum bucket count
>>>> is therefore lowered.
>>>> 
>>>> Suggested-by: Neil Brown <neilb@xxxxxxx>
>>>> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
>>>> ---
>>>> fs/nfsd/filecache.c |  289 +++++++++++++++++++--------------------------------
>>>> fs/nfsd/filecache.h |    9 +-
>>>> 2 files changed, 115 insertions(+), 183 deletions(-)
>>>> 
>>>> Just to note that this work is still in the wings. It would need to
>>>> be rebased on Jeff's recent fixes and clean-ups. I'm reluctant to
>>>> apply this until there is more evidence that the instability in v6.0
>>>> has been duly addressed.
>>>> 
>>>> 
>>>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>>>> index 45b2c9e3f636..f04e722bb6bc 100644
>>>> --- a/fs/nfsd/filecache.c
>>>> +++ b/fs/nfsd/filecache.c
>>>> @@ -74,70 +74,9 @@ static struct list_lru			nfsd_file_lru;
>>>> static unsigned long			nfsd_file_flags;
>>>> static struct fsnotify_group		*nfsd_file_fsnotify_group;
>>>> static struct delayed_work		nfsd_filecache_laundrette;
>>>> -static struct rhashtable		nfsd_file_rhash_tbl
>>>> +static struct rhltable			nfsd_file_rhltable
>>>> 						____cacheline_aligned_in_smp;
>>>> 
>>>> -enum nfsd_file_lookup_type {
>>>> -	NFSD_FILE_KEY_INODE,
>>>> -	NFSD_FILE_KEY_FULL,
>>>> -};
>>>> -
>>>> -struct nfsd_file_lookup_key {
>>>> -	struct inode			*inode;
>>>> -	struct net			*net;
>>>> -	const struct cred		*cred;
>>>> -	unsigned char			need;
>>>> -	bool				gc;
>>>> -	enum nfsd_file_lookup_type	type;
>>>> -};
>>>> -
>>>> -/*
>>>> - * The returned hash value is based solely on the address of an in-code
>>>> - * inode, a pointer to a slab-allocated object. The entropy in such a
>>>> - * pointer is concentrated in its middle bits.
>>>> - */
>>>> -static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
>>>> -{
>>>> -	unsigned long ptr = (unsigned long)inode;
>>>> -	u32 k;
>>>> -
>>>> -	k = ptr >> L1_CACHE_SHIFT;
>>>> -	k &= 0x00ffffff;
>>>> -	return jhash2(&k, 1, seed);
>>>> -}
>>>> -
>>>> -/**
>>>> - * nfsd_file_key_hashfn - Compute the hash value of a lookup key
>>>> - * @data: key on which to compute the hash value
>>>> - * @len: rhash table's key_len parameter (unused)
>>>> - * @seed: rhash table's random seed of the day
>>>> - *
>>>> - * Return value:
>>>> - *   Computed 32-bit hash value
>>>> - */
>>>> -static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
>>>> -{
>>>> -	const struct nfsd_file_lookup_key *key = data;
>>>> -
>>>> -	return nfsd_file_inode_hash(key->inode, seed);
>>>> -}
>>>> -
>>>> -/**
>>>> - * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
>>>> - * @data: object on which to compute the hash value
>>>> - * @len: rhash table's key_len parameter (unused)
>>>> - * @seed: rhash table's random seed of the day
>>>> - *
>>>> - * Return value:
>>>> - *   Computed 32-bit hash value
>>>> - */
>>>> -static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
>>>> -{
>>>> -	const struct nfsd_file *nf = data;
>>>> -
>>>> -	return nfsd_file_inode_hash(nf->nf_inode, seed);
>>>> -}
>>>> -
>>>> static bool
>>>> nfsd_match_cred(const struct cred *c1, const struct cred *c2)
>>>> {
>>>> @@ -158,53 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
>>>> 	return true;
>>>> }
>>>> 
>>>> -/**
>>>> - * nfsd_file_obj_cmpfn - Match a cache item against search criteria
>>>> - * @arg: search criteria
>>>> - * @ptr: cache item to check
>>>> - *
>>>> - * Return values:
>>>> - *   %0 - Item matches search criteria
>>>> - *   %1 - Item does not match search criteria
>>>> - */
>>>> -static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
>>>> -			       const void *ptr)
>>>> -{
>>>> -	const struct nfsd_file_lookup_key *key = arg->key;
>>>> -	const struct nfsd_file *nf = ptr;
>>>> -
>>>> -	switch (key->type) {
>>>> -	case NFSD_FILE_KEY_INODE:
>>>> -		if (nf->nf_inode != key->inode)
>>>> -			return 1;
>>>> -		break;
>>>> -	case NFSD_FILE_KEY_FULL:
>>>> -		if (nf->nf_inode != key->inode)
>>>> -			return 1;
>>>> -		if (nf->nf_may != key->need)
>>>> -			return 1;
>>>> -		if (nf->nf_net != key->net)
>>>> -			return 1;
>>>> -		if (!nfsd_match_cred(nf->nf_cred, key->cred))
>>>> -			return 1;
>>>> -		if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
>>>> -			return 1;
>>>> -		if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
>>>> -			return 1;
>>>> -		break;
>>>> -	}
>>>> -	return 0;
>>>> -}
>>>> -
>>>> static const struct rhashtable_params nfsd_file_rhash_params = {
>>>> 	.key_len		= sizeof_field(struct nfsd_file, nf_inode),
>>>> 	.key_offset		= offsetof(struct nfsd_file, nf_inode),
>>>> -	.head_offset		= offsetof(struct nfsd_file, nf_rhash),
>>>> -	.hashfn			= nfsd_file_key_hashfn,
>>>> -	.obj_hashfn		= nfsd_file_obj_hashfn,
>>>> -	.obj_cmpfn		= nfsd_file_obj_cmpfn,
>>>> -	/* Reduce resizing churn on light workloads */
>>>> -	.min_size		= 512,		/* buckets */
>>>> +	.head_offset		= offsetof(struct nfsd_file, nf_rlist),
>>>> +
>>>> +	/*
>>>> +	 * Start with a single page hash table to reduce resizing churn
>>>> +	 * on light workloads.
>>>> +	 */
>>>> +	.min_size		= 256,
>>>> 	.automatic_shrinking	= true,
>>>> };
>>>> 
>>>> @@ -307,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
>>>> }
>>>> 
>>>> static struct nfsd_file *
>>>> -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
>>>> +nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
>>>> +		bool want_gc)
>>>> {
>>>> 	struct nfsd_file *nf;
>>>> 
>>>> 	nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
>>>> -	if (nf) {
>>>> -		INIT_LIST_HEAD(&nf->nf_lru);
>>>> -		nf->nf_birthtime = ktime_get();
>>>> -		nf->nf_file = NULL;
>>>> -		nf->nf_cred = get_current_cred();
>>>> -		nf->nf_net = key->net;
>>>> -		nf->nf_flags = 0;
>>>> -		__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
>>>> -		__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
>>>> -		if (key->gc)
>>>> -			__set_bit(NFSD_FILE_GC, &nf->nf_flags);
>>>> -		nf->nf_inode = key->inode;
>>>> -		refcount_set(&nf->nf_ref, 1);
>>>> -		nf->nf_may = key->need;
>>>> -		nf->nf_mark = NULL;
>>>> -	}
>>>> +	if (unlikely(!nf))
>>>> +		return NULL;
>>>> +
>>>> +	INIT_LIST_HEAD(&nf->nf_lru);
>>>> +	nf->nf_birthtime = ktime_get();
>>>> +	nf->nf_file = NULL;
>>>> +	nf->nf_cred = get_current_cred();
>>>> +	nf->nf_net = net;
>>>> +	nf->nf_flags = want_gc ?
>>>> +		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
>>>> +		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
>>>> +	nf->nf_inode = inode;
>>>> +	refcount_set(&nf->nf_ref, 1);
>>>> +	nf->nf_may = need;
>>>> +	nf->nf_mark = NULL;
>>>> 	return nf;
>>>> }
>>>> 
>>>> @@ -362,8 +264,8 @@ nfsd_file_hash_remove(struct nfsd_file *nf)
>>>> 
>>>> 	if (nfsd_file_check_write_error(nf))
>>>> 		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
>>>> -	rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
>>>> -			       nfsd_file_rhash_params);
>>>> +	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
>>>> +			nfsd_file_rhash_params);
>>>> }
>>>> 
>>>> static bool
>>>> @@ -680,21 +582,19 @@ static struct shrinker	nfsd_file_shrinker = {
>>>> static void
>>>> nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
>>>> {
>>>> -	struct nfsd_file_lookup_key key = {
>>>> -		.type	= NFSD_FILE_KEY_INODE,
>>>> -		.inode	= inode,
>>>> -	};
>>>> -	struct nfsd_file *nf;
>>>> -
>>>> 	rcu_read_lock();
>>>> 	do {
>>>> +		struct rhlist_head *list;
>>>> +		struct nfsd_file *nf;
>>>> 		int decrement = 1;
>>>> 
>>>> -		nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
>>>> +		list = rhltable_lookup(&nfsd_file_rhltable, &inode,
>>>> 				       nfsd_file_rhash_params);
>>>> -		if (!nf)
>>>> +		if (!list)
>>>> 			break;
>>>> 
>>> 
>>> Rather than redriving the lookup multiple times in the loop, would it be
>>> better to just search once and then walk the resulting list with
>>> rhl_for_each_entry_rcu, all while holding the rcu_read_lock?
>> 
>> That would be nice, but as you and I have discussed before:
>> 
>> On every iteration, we're possibly calling nfsd_file_unhash(), which
>> changes the list. So we have to invoke rhltable_lookup() again to get
>> the updated version of that list.
>> 
>> As far as I can see there's no "_safe" version of rhl_for_each_entry.
>> 
>> I think the best we can do is not redrive the lookup if we didn't
>> unhash anything. I'm not sure that will fit easily with the
>> nfsd_file_cond_queue thingie you just added in nfsd-fixes.
>> 
>> Perhaps it should also drop the RCU read lock on each iteration in
>> case it finds a lot of inodes that match the lookup criteria.
>> 
> 
> I could be wrong, but it looks like you're safe to traverse the list
> even in the case of removals, assuming the objects themselves are
> rcu-freed. AFAICT, the object's ->next pointer is not changed when it's
> removed from the table. After all, we're not holding a "real" lock here
> so the object could be removed by another task at any time.
> 
> It would be nice if this were documented though.

Given Herbert's confirmation that we can continue to use the
returned list after an item has been deleted from it, I will
update this patch to do that.

I had some concerns that, because we don't control the maximum
length of these lists, it could result in indeterminately long
periods where synchronize_rcu() would not make progress, so I
had proposed releasing the RCU read lock and performing the
lookup again during each loop iteration.

I think the usual Linux philosophy of "optimize the common case"
should apply here, and that case is that the list will contain
only a few items in nearly all instances.

Also, there are two or three other instances in the filecache
where we traverse a looked-up list without releasing the read
lock. If long lists is a problem, it will likely be an issue
in those cases first and more often.

Thus I'm going with grabbing the lock once and deleting all the
nfsd_files that are ready before releasing it.

This patch is probably going into v6.4, and I intend to post
this again for review at least once before then.

>>>> +		nf = container_of(list, struct nfsd_file, nf_rlist);
>>>> +
>>>> 		/* If we raced with someone else unhashing, ignore it */
>>>> 		if (!nfsd_file_unhash(nf))
>>>> 			continue;
>>>> @@ -836,7 +736,7 @@ nfsd_file_cache_init(void)
>>>> 	if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
>>>> 		return 0;
>>>> 
>>>> -	ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
>>>> +	ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
>>>> 	if (ret)
>>>> 		return ret;
>>>> 
>>>> @@ -904,7 +804,7 @@ nfsd_file_cache_init(void)
>>>> 	nfsd_file_mark_slab = NULL;
>>>> 	destroy_workqueue(nfsd_filecache_wq);
>>>> 	nfsd_filecache_wq = NULL;
>>>> -	rhashtable_destroy(&nfsd_file_rhash_tbl);
>>>> +	rhltable_destroy(&nfsd_file_rhltable);
>>>> 	goto out;
>>>> }
>>>> 
>>>> @@ -922,7 +822,7 @@ __nfsd_file_cache_purge(struct net *net)
>>>> 	struct nfsd_file *nf;
>>>> 	LIST_HEAD(dispose);
>>>> 
>>>> -	rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
>>>> +	rhltable_walk_enter(&nfsd_file_rhltable, &iter);
>>>> 	do {
>>>> 		rhashtable_walk_start(&iter);
>>>> 
>>>> @@ -1031,7 +931,7 @@ nfsd_file_cache_shutdown(void)
>>>> 	nfsd_file_mark_slab = NULL;
>>>> 	destroy_workqueue(nfsd_filecache_wq);
>>>> 	nfsd_filecache_wq = NULL;
>>>> -	rhashtable_destroy(&nfsd_file_rhash_tbl);
>>>> +	rhltable_destroy(&nfsd_file_rhltable);
>>>> 
>>>> 	for_each_possible_cpu(i) {
>>>> 		per_cpu(nfsd_file_cache_hits, i) = 0;
>>>> @@ -1042,6 +942,36 @@ nfsd_file_cache_shutdown(void)
>>>> 	}
>>>> }
>>>> 
>>>> +static struct nfsd_file *
>>>> +nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
>>>> +			struct inode *inode, unsigned char need,
>>>> +			bool want_gc)
>>>> +{
>>>> +	struct rhlist_head *tmp, *list;
>>>> +	struct nfsd_file *nf;
>>>> +
>>>> +	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
>>>> +			       nfsd_file_rhash_params);
>>>> +	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
>>>> +		if (nf->nf_may != need)
>>>> +			continue;
>>>> +		if (nf->nf_net != net)
>>>> +			continue;
>>>> +		if (!nfsd_match_cred(nf->nf_cred, cred))
>>>> +			continue;
>>>> +		if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
>>>> +			continue;
>>>> +		if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
>>>> +			continue;
>>>> +
>>>> +		/* If it was on the LRU, reuse that reference. */
>>>> +		if (nfsd_file_lru_remove(nf))
>>>> +			WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
>>>> +		return nf;
>>>> +	}
>>>> +	return NULL;
>>>> +}
>>>> +
>>>> /**
>>>> * nfsd_file_is_cached - are there any cached open files for this inode?
>>>> * @inode: inode to check
>>>> @@ -1056,15 +986,12 @@ nfsd_file_cache_shutdown(void)
>>>> bool
>>>> nfsd_file_is_cached(struct inode *inode)
>>>> {
>>>> -	struct nfsd_file_lookup_key key = {
>>>> -		.type	= NFSD_FILE_KEY_INODE,
>>>> -		.inode	= inode,
>>>> -	};
>>>> -	bool ret = false;
>>>> -
>>>> -	if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
>>>> -				   nfsd_file_rhash_params) != NULL)
>>>> -		ret = true;
>>>> +	bool ret;
>>>> +
>>>> +	rcu_read_lock();
>>>> +	ret = (rhltable_lookup(&nfsd_file_rhltable, &inode,
>>>> +			       nfsd_file_rhash_params) != NULL);
>>>> +	rcu_read_unlock();
>>>> 	trace_nfsd_file_is_cached(inode, (int)ret);
>>>> 	return ret;
>>>> }
>>>> @@ -1074,14 +1001,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>> 		     unsigned int may_flags, struct nfsd_file **pnf,
>>>> 		     bool open, bool want_gc)
>>>> {
>>>> -	struct nfsd_file_lookup_key key = {
>>>> -		.type	= NFSD_FILE_KEY_FULL,
>>>> -		.need	= may_flags & NFSD_FILE_MAY_MASK,
>>>> -		.net	= SVC_NET(rqstp),
>>>> -		.gc	= want_gc,
>>>> -	};
>>>> +	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
>>>> +	struct net *net = SVC_NET(rqstp);
>>>> +	struct nfsd_file *new, *nf;
>>>> +	const struct cred *cred;
>>>> 	bool open_retry = true;
>>>> -	struct nfsd_file *nf;
>>>> +	struct inode *inode;
>>>> 	__be32 status;
>>>> 	int ret;
>>>> 
>>>> @@ -1089,32 +1014,38 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>> 				may_flags|NFSD_MAY_OWNER_OVERRIDE);
>>>> 	if (status != nfs_ok)
>>>> 		return status;
>>>> -	key.inode = d_inode(fhp->fh_dentry);
>>>> -	key.cred = get_current_cred();
>>>> +	inode = d_inode(fhp->fh_dentry);
>>>> +	cred = get_current_cred();
>>>> 
>>>> retry:
>>>> -	rcu_read_lock();
>>>> -	nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
>>>> -			       nfsd_file_rhash_params);
>>>> -	if (nf)
>>>> -		nf = nfsd_file_get(nf);
>>>> -	rcu_read_unlock();
>>>> -
>>>> -	if (nf) {
>>>> -		if (nfsd_file_lru_remove(nf))
>>>> -			WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
>>>> -		goto wait_for_construction;
>>>> +	if (open) {
>>>> +		rcu_read_lock();
>>>> +		nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
>>>> +		rcu_read_unlock();
>>>> +		if (nf)
>>>> +			goto wait_for_construction;
>>>> 	}
>>>> 
>>>> -	nf = nfsd_file_alloc(&key, may_flags);
>>>> -	if (!nf) {
>>>> +	new = nfsd_file_alloc(net, inode, need, want_gc);
>>>> +	if (!new) {
>>>> 		status = nfserr_jukebox;
>>>> 		goto out_status;
>>>> 	}
>>>> +	rcu_read_lock();
>>>> +	spin_lock(&inode->i_lock);
>>>> +	nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
>>>> +	if (unlikely(nf)) {
>>>> +		spin_unlock(&inode->i_lock);
>>>> +		rcu_read_unlock();
>>>> +		nfsd_file_slab_free(&new->nf_rcu);
>>>> +		goto wait_for_construction;
>>>> +	}
>>>> +	nf = new;
>>>> +	ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
>>>> +			      nfsd_file_rhash_params);
>>>> +	spin_unlock(&inode->i_lock);
>>>> +	rcu_read_unlock();
>>>> 
>>>> -	ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
>>>> -					   &key, &nf->nf_rhash,
>>>> -					   nfsd_file_rhash_params);
>>>> 	if (likely(ret == 0))
>>>> 		goto open_file;
>>>> 
>>>> @@ -1122,7 +1053,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>> 	nf = NULL;
>>>> 	if (ret == -EEXIST)
>>>> 		goto retry;
>>>> -	trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
>>>> +	trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
>>>> 	status = nfserr_jukebox;
>>>> 	goto out_status;
>>>> 
>>>> @@ -1131,7 +1062,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>> 
>>>> 	/* Did construction of this file fail? */
>>>> 	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
>>>> -		trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
>>>> +		trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
>>>> 		if (!open_retry) {
>>>> 			status = nfserr_jukebox;
>>>> 			goto out;
>>>> @@ -1157,14 +1088,14 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>> 	}
>>>> 
>>>> out_status:
>>>> -	put_cred(key.cred);
>>>> +	put_cred(cred);
>>>> 	if (open)
>>>> -		trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
>>>> +		trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
>>>> 	return status;
>>>> 
>>>> open_file:
>>>> 	trace_nfsd_file_alloc(nf);
>>>> -	nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
>>>> +	nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
>>>> 	if (nf->nf_mark) {
>>>> 		if (open) {
>>>> 			status = nfsd_open_verified(rqstp, fhp, may_flags,
>>>> @@ -1178,7 +1109,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>> 	 * If construction failed, or we raced with a call to unlink()
>>>> 	 * then unhash.
>>>> 	 */
>>>> -	if (status == nfs_ok && key.inode->i_nlink == 0)
>>>> +	if (status != nfs_ok || inode->i_nlink == 0)
>>>> 		status = nfserr_jukebox;
>>>> 	if (status != nfs_ok)
>>>> 		nfsd_file_unhash(nf);
>>>> @@ -1273,7 +1204,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
>>>> 		lru = list_lru_count(&nfsd_file_lru);
>>>> 
>>>> 		rcu_read_lock();
>>>> -		ht = &nfsd_file_rhash_tbl;
>>>> +		ht = &nfsd_file_rhltable.ht;
>>>> 		count = atomic_read(&ht->nelems);
>>>> 		tbl = rht_dereference_rcu(ht->tbl, ht);
>>>> 		buckets = tbl->size;
>>>> @@ -1289,7 +1220,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
>>>> 		evictions += per_cpu(nfsd_file_evictions, i);
>>>> 	}
>>>> 
>>>> -	seq_printf(m, "total entries: %u\n", count);
>>>> +	seq_printf(m, "total inodes: %u\n", count);
>>>> 	seq_printf(m, "hash buckets:  %u\n", buckets);
>>>> 	seq_printf(m, "lru entries:   %lu\n", lru);
>>>> 	seq_printf(m, "cache hits:    %lu\n", hits);
>>>> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
>>>> index b7efb2c3ddb1..7d3b35771565 100644
>>>> --- a/fs/nfsd/filecache.h
>>>> +++ b/fs/nfsd/filecache.h
>>>> @@ -29,9 +29,8 @@ struct nfsd_file_mark {
>>>> * never be dereferenced, only used for comparison.
>>>> */
>>>> struct nfsd_file {
>>>> -	struct rhash_head	nf_rhash;
>>>> -	struct list_head	nf_lru;
>>>> -	struct rcu_head		nf_rcu;
>>>> +	struct rhlist_head	nf_rlist;
>>>> +	void			*nf_inode;
>>>> 	struct file		*nf_file;
>>>> 	const struct cred	*nf_cred;
>>>> 	struct net		*nf_net;
>>>> @@ -40,10 +39,12 @@ struct nfsd_file {
>>>> #define NFSD_FILE_REFERENCED	(2)
>>>> #define NFSD_FILE_GC		(3)
>>>> 	unsigned long		nf_flags;
>>>> -	struct inode		*nf_inode;	/* don't deref */
>>>> 	refcount_t		nf_ref;
>>>> 	unsigned char		nf_may;
>>>> +
>>>> 	struct nfsd_file_mark	*nf_mark;
>>>> +	struct list_head	nf_lru;
>>>> +	struct rcu_head		nf_rcu;
>>>> 	ktime_t			nf_birthtime;
>>>> };
>>>> 
>>>> 
>>>> 
>>> 
>>> -- 
>>> Jeff Layton <jlayton@xxxxxxxxxx>
>> 
>> --
>> Chuck Lever
>> 
>> 
>> 
> 
> -- 
> Jeff Layton <jlayton@xxxxxxxxxx>

--
Chuck Lever