Re: [PATCH] nfsd: ensure the nfsd_serv pointer is cleared when svc is torn down

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Oct 27, 2023 at 07:53:44AM -0400, Jeff Layton wrote:
> Zhi Li reported a refcount_t use-after-free when bringing up nfsd.

Aw, Jeff. You promised you wouldn't post a bug fix during the last
weekend of -rc again. ;-)


> We set the nn->nfsd_serv pointer in nfsd_create_serv, but it's only ever
> cleared in nfsd_last_thread. When setting up a new socket, if there is
> an error, this can leave nfsd_serv pointer set after it has been freed.

Maybe missing a call to nfsd_last_thread in this case?


> We need to better couple the existence of the object with the value of
> the nfsd_serv pointer.
> 
> Since we always increment and decrement the svc_serv references under
> mutex, just test for whether the next put will destroy it in nfsd_put,
> and clear the pointer beforehand if so. Add a new nfsd_get function for

My memory isn't 100% reliable, but I seem to recall that Neil spent
some effort getting rid of the nfsd_get() helper in recent kernels.
So, nfsd_get() isn't especially new. I will wait for Neil's review.

Let's target the fix (when we've agreed upon one) for v6.7-rc.


> better clarity and so that we can enforce that the mutex is held via
> lockdep. Remove the clearing of the pointer from nfsd_last_thread.
> Finally, change all of the svc_get and svc_put calls to use the updated
> wrappers.

This seems like a good clean-up. If we need to deal with the set up
and tear down of per-net namespace metadata, I don't see a nicer
way to do it than nfsd_get/put.


> Reported-by: Zhi Li <yieli@xxxxxxxxxx>

Closes: ?


> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> ---
> When using their test harness, the RHQA folks would sometimes see the
> nfsv3 portmapper registration fail with -ERESTARTSYS, and that would
> trigger this bug. I could never reproduce that easily on my own, but I
> was able to validate this by hacking some fault injection into
> svc_register.
> ---
>  fs/nfsd/nfsctl.c |  4 ++--
>  fs/nfsd/nfsd.h   |  8 ++-----
>  fs/nfsd/nfssvc.c | 72 ++++++++++++++++++++++++++++++++++++--------------------
>  3 files changed, 51 insertions(+), 33 deletions(-)
> 
> diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> index 7ed02fb88a36..f8c0fed99c7f 100644
> --- a/fs/nfsd/nfsctl.c
> +++ b/fs/nfsd/nfsctl.c
> @@ -706,7 +706,7 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net, const struct cred
>  
>  	if (err >= 0 &&
>  	    !nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
> -		svc_get(nn->nfsd_serv);
> +		nfsd_get(net);
>  
>  	nfsd_put(net);
>  	return err;
> @@ -745,7 +745,7 @@ static ssize_t __write_ports_addxprt(char *buf, struct net *net, const struct cr
>  		goto out_close;
>  
>  	if (!nn->nfsd_serv->sv_nrthreads && !xchg(&nn->keep_active, 1))
> -		svc_get(nn->nfsd_serv);
> +		nfsd_get(net);
>  
>  	nfsd_put(net);
>  	return 0;
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 11c14faa6c67..c9cb70bf2a6d 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -96,12 +96,8 @@ int		nfsd_pool_stats_open(struct inode *, struct file *);
>  int		nfsd_pool_stats_release(struct inode *, struct file *);
>  void		nfsd_shutdown_threads(struct net *net);
>  
> -static inline void nfsd_put(struct net *net)
> -{
> -	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> -
> -	svc_put(nn->nfsd_serv);
> -}
> +struct svc_serv	*nfsd_get(struct net *net);
> +void		nfsd_put(struct net *net);
>  
>  bool		i_am_nfsd(void);
>  
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index c7af1095f6b5..4c00478c28dd 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -66,7 +66,7 @@ static __be32			nfsd_init_request(struct svc_rqst *,
>   * ->sv_pools[].
>   *
>   * Each active thread holds a counted reference on nn->nfsd_serv, as does
> - * the nn->keep_active flag and various transient calls to svc_get().
> + * the nn->keep_active flag and various transient calls to nfsd_get().
>   *
>   * Finally, the nfsd_mutex also protects some of the global variables that are
>   * accessed when nfsd starts and that are settable via the write_* routines in
> @@ -477,6 +477,39 @@ static void nfsd_shutdown_net(struct net *net)
>  }
>  
>  static DEFINE_SPINLOCK(nfsd_notifier_lock);
> +
> +struct svc_serv *nfsd_get(struct net *net)
> +{
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +	struct svc_serv *serv = nn->nfsd_serv;
> +
> +	lockdep_assert_held(&nfsd_mutex);
> +	if (serv)
> +		svc_get(serv);
> +	return serv;
> +}
> +
> +void nfsd_put(struct net *net)
> +{
> +	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
> +	struct svc_serv *serv = nn->nfsd_serv;
> +
> +	/*
> +	 * The notifiers expect that if the nfsd_serv pointer is
> +	 * set that it's safe to access, so we must clear that
> +	 * pointer first before putting the last reference. Because
> +	 * we always increment and decrement the refcount under the
> +	 * mutex, it's safe to determine this via kref_read.
> +	 */

These two need kdoc comments. You could move this big block into
the kdoc comment for nfsd_put.


> +	lockdep_assert_held(&nfsd_mutex);
> +	if (kref_read(&serv->sv_refcnt) == 1) {
> +		spin_lock(&nfsd_notifier_lock);
> +		nn->nfsd_serv = NULL;
> +		spin_unlock(&nfsd_notifier_lock);
> +	}
> +	svc_put(serv);
> +}
> +
>  static int nfsd_inetaddr_event(struct notifier_block *this, unsigned long event,
>  	void *ptr)
>  {
> @@ -547,10 +580,6 @@ static void nfsd_last_thread(struct net *net)
>  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  	struct svc_serv *serv = nn->nfsd_serv;
>  
> -	spin_lock(&nfsd_notifier_lock);
> -	nn->nfsd_serv = NULL;
> -	spin_unlock(&nfsd_notifier_lock);
> -
>  	/* check if the notifier still has clients */
>  	if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
>  		unregister_inetaddr_notifier(&nfsd_inetaddr_notifier);
> @@ -638,21 +667,19 @@ static int nfsd_get_default_max_blksize(void)
>  
>  void nfsd_shutdown_threads(struct net *net)
>  {
> -	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  	struct svc_serv *serv;
>  
>  	mutex_lock(&nfsd_mutex);
> -	serv = nn->nfsd_serv;
> +	serv = nfsd_get(net);
>  	if (serv == NULL) {
>  		mutex_unlock(&nfsd_mutex);
>  		return;
>  	}
>  
> -	svc_get(serv);
>  	/* Kill outstanding nfsd threads */
>  	svc_set_num_threads(serv, NULL, 0);
>  	nfsd_last_thread(net);
> -	svc_put(serv);
> +	nfsd_put(net);
>  	mutex_unlock(&nfsd_mutex);
>  }
>  
> @@ -663,15 +690,13 @@ bool i_am_nfsd(void)
>  
>  int nfsd_create_serv(struct net *net)
>  {
> -	int error;
>  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  	struct svc_serv *serv;
> +	int error;
>  
> -	WARN_ON(!mutex_is_locked(&nfsd_mutex));
> -	if (nn->nfsd_serv) {
> -		svc_get(nn->nfsd_serv);
> +	serv = nfsd_get(net);
> +	if (serv)
>  		return 0;
> -	}
>  	if (nfsd_max_blksize == 0)
>  		nfsd_max_blksize = nfsd_get_default_max_blksize();
>  	nfsd_reset_versions(nn);
> @@ -731,8 +756,6 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
>  	int err = 0;
>  	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
>  
> -	WARN_ON(!mutex_is_locked(&nfsd_mutex));
> -
>  	if (nn->nfsd_serv == NULL || n <= 0)
>  		return 0;
>  
> @@ -766,7 +789,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
>  		nthreads[0] = 1;
>  
>  	/* apply the new numbers */
> -	svc_get(nn->nfsd_serv);
> +	nfsd_get(net);
>  	for (i = 0; i < n; i++) {
>  		err = svc_set_num_threads(nn->nfsd_serv,
>  					  &nn->nfsd_serv->sv_pools[i],
> @@ -774,7 +797,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
>  		if (err)
>  			break;
>  	}
> -	svc_put(nn->nfsd_serv);
> +	nfsd_put(net);
>  	return err;
>  }
>  
> @@ -826,8 +849,8 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
>  out_put:
>  	/* Threads now hold service active */
>  	if (xchg(&nn->keep_active, 0))
> -		svc_put(serv);
> -	svc_put(serv);
> +		nfsd_put(net);
> +	nfsd_put(net);
>  out:
>  	mutex_unlock(&nfsd_mutex);
>  	return error;
> @@ -1067,14 +1090,14 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
>  int nfsd_pool_stats_open(struct inode *inode, struct file *file)
>  {
>  	int ret;
> +	struct net *net = inode->i_sb->s_fs_info;
>  	struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
>  
>  	mutex_lock(&nfsd_mutex);
> -	if (nn->nfsd_serv == NULL) {
> +	if (nfsd_get(net) == NULL) {
>  		mutex_unlock(&nfsd_mutex);
>  		return -ENODEV;
>  	}
> -	svc_get(nn->nfsd_serv);
>  	ret = svc_pool_stats_open(nn->nfsd_serv, file);
>  	mutex_unlock(&nfsd_mutex);
>  	return ret;
> @@ -1082,12 +1105,11 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file)
>  
>  int nfsd_pool_stats_release(struct inode *inode, struct file *file)
>  {
> -	struct seq_file *seq = file->private_data;
> -	struct svc_serv *serv = seq->private;
> +	struct net *net = inode->i_sb->s_fs_info;
>  	int ret = seq_release(inode, file);
>  
>  	mutex_lock(&nfsd_mutex);
> -	svc_put(serv);
> +	nfsd_put(net);
>  	mutex_unlock(&nfsd_mutex);
>  	return ret;
>  }
> 
> ---
> base-commit: 80eea12811ab8b32e3eac355adff695df5b4ba8e
> change-id: 20231026-kdevops-3c18d260bf7c
> 
> Best regards,
> -- 
> Jeff Layton <jlayton@xxxxxxxxxx>
> 

-- 
Chuck Lever



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux