Re: [PATCH RFC] NFSD: Convert filecache to rhltable

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2023-01-05 at 09:58 -0500, Chuck Lever wrote:
> From: Chuck Lever <chuck.lever@xxxxxxxxxx>
> 
> While we were converting the nfs4_file hashtable to use the kernel's
> resizable hashtable data structure, Neil Brown observed that the
> list variant (rhltable) would be better for managing nfsd_file items
> as well. The nfsd_file hash table will contain multiple entries for
> the same inode -- these should be kept together on a list. And, it
> could be possible for exotic or malicious client behavior to cause
> the hash table to resize itself on every insertion.
> 
> A nice simplification is that rhltable_lookup() can return a list
> that contains only nfsd_file items that match a given inode, which
> enables us to eliminate specialized hash table helper functions and
> use the default functions provided by the rhashtable implementation).
> 
> Since we are now storing nfsd_file items for the same inode on a
> single list, that effectively reduces the number of hash entries
> that have to be tracked in the hash table. The mininum bucket count
> is therefore lowered.
> 
> Suggested-by: Neil Brown <neilb@xxxxxxx>
> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx>
> ---
>  fs/nfsd/filecache.c |  289 +++++++++++++++++++--------------------------------
>  fs/nfsd/filecache.h |    9 +-
>  2 files changed, 115 insertions(+), 183 deletions(-)
> 
> Just to note that this work is still in the wings. It would need to
> be rebased on Jeff's recent fixes and clean-ups. I'm reluctant to
> apply this until there is more evidence that the instability in v6.0
> has been duly addressed.
> 
> 
> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
> index 45b2c9e3f636..f04e722bb6bc 100644
> --- a/fs/nfsd/filecache.c
> +++ b/fs/nfsd/filecache.c
> @@ -74,70 +74,9 @@ static struct list_lru			nfsd_file_lru;
>  static unsigned long			nfsd_file_flags;
>  static struct fsnotify_group		*nfsd_file_fsnotify_group;
>  static struct delayed_work		nfsd_filecache_laundrette;
> -static struct rhashtable		nfsd_file_rhash_tbl
> +static struct rhltable			nfsd_file_rhltable
>  						____cacheline_aligned_in_smp;
>  
> -enum nfsd_file_lookup_type {
> -	NFSD_FILE_KEY_INODE,
> -	NFSD_FILE_KEY_FULL,
> -};
> -
> -struct nfsd_file_lookup_key {
> -	struct inode			*inode;
> -	struct net			*net;
> -	const struct cred		*cred;
> -	unsigned char			need;
> -	bool				gc;
> -	enum nfsd_file_lookup_type	type;
> -};
> -
> -/*
> - * The returned hash value is based solely on the address of an in-code
> - * inode, a pointer to a slab-allocated object. The entropy in such a
> - * pointer is concentrated in its middle bits.
> - */
> -static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
> -{
> -	unsigned long ptr = (unsigned long)inode;
> -	u32 k;
> -
> -	k = ptr >> L1_CACHE_SHIFT;
> -	k &= 0x00ffffff;
> -	return jhash2(&k, 1, seed);
> -}
> -
> -/**
> - * nfsd_file_key_hashfn - Compute the hash value of a lookup key
> - * @data: key on which to compute the hash value
> - * @len: rhash table's key_len parameter (unused)
> - * @seed: rhash table's random seed of the day
> - *
> - * Return value:
> - *   Computed 32-bit hash value
> - */
> -static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
> -{
> -	const struct nfsd_file_lookup_key *key = data;
> -
> -	return nfsd_file_inode_hash(key->inode, seed);
> -}
> -
> -/**
> - * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
> - * @data: object on which to compute the hash value
> - * @len: rhash table's key_len parameter (unused)
> - * @seed: rhash table's random seed of the day
> - *
> - * Return value:
> - *   Computed 32-bit hash value
> - */
> -static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
> -{
> -	const struct nfsd_file *nf = data;
> -
> -	return nfsd_file_inode_hash(nf->nf_inode, seed);
> -}
> -
>  static bool
>  nfsd_match_cred(const struct cred *c1, const struct cred *c2)
>  {
> @@ -158,53 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
>  	return true;
>  }
>  
> -/**
> - * nfsd_file_obj_cmpfn - Match a cache item against search criteria
> - * @arg: search criteria
> - * @ptr: cache item to check
> - *
> - * Return values:
> - *   %0 - Item matches search criteria
> - *   %1 - Item does not match search criteria
> - */
> -static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
> -			       const void *ptr)
> -{
> -	const struct nfsd_file_lookup_key *key = arg->key;
> -	const struct nfsd_file *nf = ptr;
> -
> -	switch (key->type) {
> -	case NFSD_FILE_KEY_INODE:
> -		if (nf->nf_inode != key->inode)
> -			return 1;
> -		break;
> -	case NFSD_FILE_KEY_FULL:
> -		if (nf->nf_inode != key->inode)
> -			return 1;
> -		if (nf->nf_may != key->need)
> -			return 1;
> -		if (nf->nf_net != key->net)
> -			return 1;
> -		if (!nfsd_match_cred(nf->nf_cred, key->cred))
> -			return 1;
> -		if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
> -			return 1;
> -		if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
> -			return 1;
> -		break;
> -	}
> -	return 0;
> -}
> -
>  static const struct rhashtable_params nfsd_file_rhash_params = {
>  	.key_len		= sizeof_field(struct nfsd_file, nf_inode),
>  	.key_offset		= offsetof(struct nfsd_file, nf_inode),
> -	.head_offset		= offsetof(struct nfsd_file, nf_rhash),
> -	.hashfn			= nfsd_file_key_hashfn,
> -	.obj_hashfn		= nfsd_file_obj_hashfn,
> -	.obj_cmpfn		= nfsd_file_obj_cmpfn,
> -	/* Reduce resizing churn on light workloads */
> -	.min_size		= 512,		/* buckets */
> +	.head_offset		= offsetof(struct nfsd_file, nf_rlist),
> +
> +	/*
> +	 * Start with a single page hash table to reduce resizing churn
> +	 * on light workloads.
> +	 */
> +	.min_size		= 256,
>  	.automatic_shrinking	= true,
>  };
>  
> @@ -307,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
>  }
>  
>  static struct nfsd_file *
> -nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
> +nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
> +		bool want_gc)
>  {
>  	struct nfsd_file *nf;
>  
>  	nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
> -	if (nf) {
> -		INIT_LIST_HEAD(&nf->nf_lru);
> -		nf->nf_birthtime = ktime_get();
> -		nf->nf_file = NULL;
> -		nf->nf_cred = get_current_cred();
> -		nf->nf_net = key->net;
> -		nf->nf_flags = 0;
> -		__set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
> -		__set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
> -		if (key->gc)
> -			__set_bit(NFSD_FILE_GC, &nf->nf_flags);
> -		nf->nf_inode = key->inode;
> -		refcount_set(&nf->nf_ref, 1);
> -		nf->nf_may = key->need;
> -		nf->nf_mark = NULL;
> -	}
> +	if (unlikely(!nf))
> +		return NULL;
> +
> +	INIT_LIST_HEAD(&nf->nf_lru);
> +	nf->nf_birthtime = ktime_get();
> +	nf->nf_file = NULL;
> +	nf->nf_cred = get_current_cred();
> +	nf->nf_net = net;
> +	nf->nf_flags = want_gc ?
> +		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
> +		BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
> +	nf->nf_inode = inode;
> +	refcount_set(&nf->nf_ref, 1);
> +	nf->nf_may = need;
> +	nf->nf_mark = NULL;
>  	return nf;
>  }
>  
> @@ -362,8 +264,8 @@ nfsd_file_hash_remove(struct nfsd_file *nf)
>  
>  	if (nfsd_file_check_write_error(nf))
>  		nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id));
> -	rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
> -			       nfsd_file_rhash_params);
> +	rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
> +			nfsd_file_rhash_params);
>  }
>  
>  static bool
> @@ -680,21 +582,19 @@ static struct shrinker	nfsd_file_shrinker = {
>  static void
>  nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
>  {
> -	struct nfsd_file_lookup_key key = {
> -		.type	= NFSD_FILE_KEY_INODE,
> -		.inode	= inode,
> -	};
> -	struct nfsd_file *nf;
> -
>  	rcu_read_lock();
>  	do {
> +		struct rhlist_head *list;
> +		struct nfsd_file *nf;
>  		int decrement = 1;
>  
> -		nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
> +		list = rhltable_lookup(&nfsd_file_rhltable, &inode,
>  				       nfsd_file_rhash_params);
> -		if (!nf)
> +		if (!list)
>  			break;
>  

Rather than redriving the lookup multiple times in the loop, would it be
better to just search once and then walk the resulting list with
rhl_for_each_entry_rcu, all while holding the rcu_read_lock?

> +		nf = container_of(list, struct nfsd_file, nf_rlist);
> +
>  		/* If we raced with someone else unhashing, ignore it */
>  		if (!nfsd_file_unhash(nf))
>  			continue;
> @@ -836,7 +736,7 @@ nfsd_file_cache_init(void)
>  	if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
>  		return 0;
>  
> -	ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
> +	ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
>  	if (ret)
>  		return ret;
>  
> @@ -904,7 +804,7 @@ nfsd_file_cache_init(void)
>  	nfsd_file_mark_slab = NULL;
>  	destroy_workqueue(nfsd_filecache_wq);
>  	nfsd_filecache_wq = NULL;
> -	rhashtable_destroy(&nfsd_file_rhash_tbl);
> +	rhltable_destroy(&nfsd_file_rhltable);
>  	goto out;
>  }
>  
> @@ -922,7 +822,7 @@ __nfsd_file_cache_purge(struct net *net)
>  	struct nfsd_file *nf;
>  	LIST_HEAD(dispose);
>  
> -	rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
> +	rhltable_walk_enter(&nfsd_file_rhltable, &iter);
>  	do {
>  		rhashtable_walk_start(&iter);
>  
> @@ -1031,7 +931,7 @@ nfsd_file_cache_shutdown(void)
>  	nfsd_file_mark_slab = NULL;
>  	destroy_workqueue(nfsd_filecache_wq);
>  	nfsd_filecache_wq = NULL;
> -	rhashtable_destroy(&nfsd_file_rhash_tbl);
> +	rhltable_destroy(&nfsd_file_rhltable);
>  
>  	for_each_possible_cpu(i) {
>  		per_cpu(nfsd_file_cache_hits, i) = 0;
> @@ -1042,6 +942,36 @@ nfsd_file_cache_shutdown(void)
>  	}
>  }
>  
> +static struct nfsd_file *
> +nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
> +			struct inode *inode, unsigned char need,
> +			bool want_gc)
> +{
> +	struct rhlist_head *tmp, *list;
> +	struct nfsd_file *nf;
> +
> +	list = rhltable_lookup(&nfsd_file_rhltable, &inode,
> +			       nfsd_file_rhash_params);
> +	rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
> +		if (nf->nf_may != need)
> +			continue;
> +		if (nf->nf_net != net)
> +			continue;
> +		if (!nfsd_match_cred(nf->nf_cred, cred))
> +			continue;
> +		if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
> +			continue;
> +		if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
> +			continue;
> +
> +		/* If it was on the LRU, reuse that reference. */
> +		if (nfsd_file_lru_remove(nf))
> +			WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
> +		return nf;
> +	}
> +	return NULL;
> +}
> +
>  /**
>   * nfsd_file_is_cached - are there any cached open files for this inode?
>   * @inode: inode to check
> @@ -1056,15 +986,12 @@ nfsd_file_cache_shutdown(void)
>  bool
>  nfsd_file_is_cached(struct inode *inode)
>  {
> -	struct nfsd_file_lookup_key key = {
> -		.type	= NFSD_FILE_KEY_INODE,
> -		.inode	= inode,
> -	};
> -	bool ret = false;
> -
> -	if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
> -				   nfsd_file_rhash_params) != NULL)
> -		ret = true;
> +	bool ret;
> +
> +	rcu_read_lock();
> +	ret = (rhltable_lookup(&nfsd_file_rhltable, &inode,
> +			       nfsd_file_rhash_params) != NULL);
> +	rcu_read_unlock();
>  	trace_nfsd_file_is_cached(inode, (int)ret);
>  	return ret;
>  }
> @@ -1074,14 +1001,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  		     unsigned int may_flags, struct nfsd_file **pnf,
>  		     bool open, bool want_gc)
>  {
> -	struct nfsd_file_lookup_key key = {
> -		.type	= NFSD_FILE_KEY_FULL,
> -		.need	= may_flags & NFSD_FILE_MAY_MASK,
> -		.net	= SVC_NET(rqstp),
> -		.gc	= want_gc,
> -	};
> +	unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
> +	struct net *net = SVC_NET(rqstp);
> +	struct nfsd_file *new, *nf;
> +	const struct cred *cred;
>  	bool open_retry = true;
> -	struct nfsd_file *nf;
> +	struct inode *inode;
>  	__be32 status;
>  	int ret;
>  
> @@ -1089,32 +1014,38 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  				may_flags|NFSD_MAY_OWNER_OVERRIDE);
>  	if (status != nfs_ok)
>  		return status;
> -	key.inode = d_inode(fhp->fh_dentry);
> -	key.cred = get_current_cred();
> +	inode = d_inode(fhp->fh_dentry);
> +	cred = get_current_cred();
>  
>  retry:
> -	rcu_read_lock();
> -	nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
> -			       nfsd_file_rhash_params);
> -	if (nf)
> -		nf = nfsd_file_get(nf);
> -	rcu_read_unlock();
> -
> -	if (nf) {
> -		if (nfsd_file_lru_remove(nf))
> -			WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
> -		goto wait_for_construction;
> +	if (open) {
> +		rcu_read_lock();
> +		nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
> +		rcu_read_unlock();
> +		if (nf)
> +			goto wait_for_construction;
>  	}
>  
> -	nf = nfsd_file_alloc(&key, may_flags);
> -	if (!nf) {
> +	new = nfsd_file_alloc(net, inode, need, want_gc);
> +	if (!new) {
>  		status = nfserr_jukebox;
>  		goto out_status;
>  	}
> +	rcu_read_lock();
> +	spin_lock(&inode->i_lock);
> +	nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
> +	if (unlikely(nf)) {
> +		spin_unlock(&inode->i_lock);
> +		rcu_read_unlock();
> +		nfsd_file_slab_free(&new->nf_rcu);
> +		goto wait_for_construction;
> +	}
> +	nf = new;
> +	ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
> +			      nfsd_file_rhash_params);
> +	spin_unlock(&inode->i_lock);
> +	rcu_read_unlock();
>  
> -	ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
> -					   &key, &nf->nf_rhash,
> -					   nfsd_file_rhash_params);
>  	if (likely(ret == 0))
>  		goto open_file;
>  
> @@ -1122,7 +1053,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	nf = NULL;
>  	if (ret == -EEXIST)
>  		goto retry;
> -	trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
> +	trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
>  	status = nfserr_jukebox;
>  	goto out_status;
>  
> @@ -1131,7 +1062,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  
>  	/* Did construction of this file fail? */
>  	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
> -		trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
> +		trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
>  		if (!open_retry) {
>  			status = nfserr_jukebox;
>  			goto out;
> @@ -1157,14 +1088,14 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	}
>  
>  out_status:
> -	put_cred(key.cred);
> +	put_cred(cred);
>  	if (open)
> -		trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
> +		trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
>  	return status;
>  
>  open_file:
>  	trace_nfsd_file_alloc(nf);
> -	nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
> +	nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
>  	if (nf->nf_mark) {
>  		if (open) {
>  			status = nfsd_open_verified(rqstp, fhp, may_flags,
> @@ -1178,7 +1109,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>  	 * If construction failed, or we raced with a call to unlink()
>  	 * then unhash.
>  	 */
> -	if (status == nfs_ok && key.inode->i_nlink == 0)
> +	if (status != nfs_ok || inode->i_nlink == 0)
>  		status = nfserr_jukebox;
>  	if (status != nfs_ok)
>  		nfsd_file_unhash(nf);
> @@ -1273,7 +1204,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
>  		lru = list_lru_count(&nfsd_file_lru);
>  
>  		rcu_read_lock();
> -		ht = &nfsd_file_rhash_tbl;
> +		ht = &nfsd_file_rhltable.ht;
>  		count = atomic_read(&ht->nelems);
>  		tbl = rht_dereference_rcu(ht->tbl, ht);
>  		buckets = tbl->size;
> @@ -1289,7 +1220,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
>  		evictions += per_cpu(nfsd_file_evictions, i);
>  	}
>  
> -	seq_printf(m, "total entries: %u\n", count);
> +	seq_printf(m, "total inodes: %u\n", count);
>  	seq_printf(m, "hash buckets:  %u\n", buckets);
>  	seq_printf(m, "lru entries:   %lu\n", lru);
>  	seq_printf(m, "cache hits:    %lu\n", hits);
> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
> index b7efb2c3ddb1..7d3b35771565 100644
> --- a/fs/nfsd/filecache.h
> +++ b/fs/nfsd/filecache.h
> @@ -29,9 +29,8 @@ struct nfsd_file_mark {
>   * never be dereferenced, only used for comparison.
>   */
>  struct nfsd_file {
> -	struct rhash_head	nf_rhash;
> -	struct list_head	nf_lru;
> -	struct rcu_head		nf_rcu;
> +	struct rhlist_head	nf_rlist;
> +	void			*nf_inode;
>  	struct file		*nf_file;
>  	const struct cred	*nf_cred;
>  	struct net		*nf_net;
> @@ -40,10 +39,12 @@ struct nfsd_file {
>  #define NFSD_FILE_REFERENCED	(2)
>  #define NFSD_FILE_GC		(3)
>  	unsigned long		nf_flags;
> -	struct inode		*nf_inode;	/* don't deref */
>  	refcount_t		nf_ref;
>  	unsigned char		nf_may;
> +
>  	struct nfsd_file_mark	*nf_mark;
> +	struct list_head	nf_lru;
> +	struct rcu_head		nf_rcu;
>  	ktime_t			nf_birthtime;
>  };
>  
> 
> 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux