Re: [PATCH] nfs: fix a deadlock in nfs client initialization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 2017-11-20 at 16:41 -0500, Scott Mayhew wrote:
> The following deadlock can occur between a process waiting for a
> client
> to initialize in while walking the client list and another process
> waiting for the nfs_clid_init_mutex so it can initialize that client:
> 
> Process 1                               Process 2
> ---------                               ---------
> spin_lock(&nn->nfs_client_lock);
> list_add_tail(&CLIENTA->cl_share_link,
>         &nn->nfs_client_list);
> spin_unlock(&nn->nfs_client_lock);
>                                         spin_lock(&nn-
> >nfs_client_lock);
>                                         list_add_tail(&CLIENTB-
> >cl_share_link,
>                                                 &nn-
> >nfs_client_list);
>                                         spin_unlock(&nn-
> >nfs_client_lock);
>                                         mutex_lock(&nfs_clid_init_mut
> ex);
>                                         nfs41_walk_client_list(clp,
> result, cred);
>                                         nfs_wait_client_init_complete
> (CLIENTA);
> (waiting for nfs_clid_init_mutex)
> 
> Add and initilize the client with the nfs_clid_init_mutex held in
> order to prevent that deadlock.
> 
> Signed-off-by: Scott Mayhew <smayhew@xxxxxxxxxx>
> ---
>  fs/nfs/client.c    | 21 +++++++++++++++++++--
>  fs/nfs/nfs4state.c |  4 ----
>  2 files changed, 19 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfs/client.c b/fs/nfs/client.c
> index 0ac2fb1..db38c47 100644
> --- a/fs/nfs/client.c
> +++ b/fs/nfs/client.c
> @@ -60,6 +60,7 @@ static
> DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
>  static DEFINE_SPINLOCK(nfs_version_lock);
>  static DEFINE_MUTEX(nfs_version_mutex);
>  static LIST_HEAD(nfs_versions);
> +static DEFINE_MUTEX(nfs_clid_init_mutex);
>  
>  /*
>   * RPC cruft for NFS
> @@ -386,7 +387,7 @@ nfs_found_client(const struct nfs_client_initdata
> *cl_init,
>   */
>  struct nfs_client *nfs_get_client(const struct nfs_client_initdata
> *cl_init)
>  {
> -	struct nfs_client *clp, *new = NULL;
> +	struct nfs_client *clp, *new = NULL, *result = NULL;
>  	struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
>  	const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod-
> >rpc_ops;
>  
> @@ -407,11 +408,27 @@ struct nfs_client *nfs_get_client(const struct
> nfs_client_initdata *cl_init)
>  			return nfs_found_client(cl_init, clp);
>  		}
>  		if (new) {
> +			/* add and initialize the client with the
> +			 * nfs_clid_init_mutex held to prevent a
> deadlock
> +			 * with the server trunking detection
> +			 */
> +			spin_unlock(&nn->nfs_client_lock);
> +			mutex_lock(&nfs_clid_init_mutex);
> +			spin_lock(&nn->nfs_client_lock);
> +			clp = nfs_match_client(cl_init);
> +			if (clp) {
> +				spin_unlock(&nn->nfs_client_lock);
> +				mutex_unlock(&nfs_clid_init_mutex);
> +				new->rpc_ops->free_client(new);
> +				return nfs_found_client(cl_init,
> clp);
> +			}
>  			list_add_tail(&new->cl_share_link,
>  					&nn->nfs_client_list);
>  			spin_unlock(&nn->nfs_client_lock);
>  			new->cl_flags = cl_init->init_flags;
> -			return rpc_ops->init_client(new, cl_init);
> +			result = rpc_ops->init_client(new, cl_init);
> +			mutex_unlock(&nfs_clid_init_mutex);
> +			return result;
>  		}
>  
>  		spin_unlock(&nn->nfs_client_lock);
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index 54fd56d..668164e 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -77,8 +77,6 @@ const nfs4_stateid invalid_stateid = {
>  	.type = NFS4_INVALID_STATEID_TYPE,
>  };
>  
> -static DEFINE_MUTEX(nfs_clid_init_mutex);
> -
>  int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred
> *cred)
>  {
>  	struct nfs4_setclientid_res clid = {
> @@ -2164,7 +2162,6 @@ int nfs4_discover_server_trunking(struct
> nfs_client *clp,
>  	clnt = clp->cl_rpcclient;
>  	i = 0;
>  
> -	mutex_lock(&nfs_clid_init_mutex);
>  again:
>  	status  = -ENOENT;
>  	cred = nfs4_get_clid_cred(clp);
> @@ -2232,7 +2229,6 @@ int nfs4_discover_server_trunking(struct
> nfs_client *clp,
>  	}
>  
>  out_unlock:
> -	mutex_unlock(&nfs_clid_init_mutex);
>  	dprintk("NFS: %s: status = %d\n", __func__, status);
>  	return status;
>  }

Your initial fix was fine. It just needed 2 changes:

1) Remove the test for 'clp->cl_minorversion > 0'.
2) Refcount the struct nfs_client while you are waiting for it to be
initialised.

-- 
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@xxxxxxxxxxxxxxx
��.n��������+%������w��{.n�����{��w���jg��������ݢj����G�������j:+v���w�m������w�������h�����٥




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux