Re: [PATCH 2/2] NFSv4: Fix a state manager thread deadlock regression

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, 18 Sep 2023, trondmy@xxxxxxxxxx wrote:
> From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> 
> Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit
> aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it
> prevents the setup of new threads to handle reboot recovery, while the
> older recovery thread is stuck returning delegations.

I hadn't realised that the state manager thread did these two distinct
tasks and might need to be doing both at once - requiring two threads.
Thanks for highlighting that.

It seems to me that even with the new code, we can still get a deadlock
when swap is enabled, as we only ever run one thread in that case.
Is that correct, or did I miss something?

Maybe we need two threads - a state manager and a delegation recall
handler.  And when swap is enabled, both need to be running permanently
??

Thanks,
NeilBrown


> 
> Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled")
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> ---
>  fs/nfs/nfs4proc.c  |  4 +++-
>  fs/nfs/nfs4state.c | 38 ++++++++++++++++++++++++++------------
>  2 files changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index 5deeaea8026e..a19e809cad16 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -10652,7 +10652,9 @@ static void nfs4_disable_swap(struct inode *inode)
>  	 */
>  	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
>  
> -	nfs4_schedule_state_manager(clp);
> +	set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
> +	clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
> +	wake_up_var(&clp->cl_state);
>  }
>  
>  static const struct inode_operations nfs4_dir_inode_operations = {
> diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> index 0bc160fbabec..5751a6886da4 100644
> --- a/fs/nfs/nfs4state.c
> +++ b/fs/nfs/nfs4state.c
> @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
>  {
>  	struct task_struct *task;
>  	char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
> +	struct rpc_clnt *clnt = clp->cl_rpcclient;
> +	bool swapon = false;
>  
> -	if (clp->cl_rpcclient->cl_shutdown)
> +	if (clnt->cl_shutdown)
>  		return;
>  
>  	set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
> -	if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) {
> -		wake_up_var(&clp->cl_state);
> -		return;
> +
> +	if (atomic_read(&clnt->cl_swapper)) {
> +		swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
> +					   &clp->cl_state);
> +		if (!swapon) {
> +			wake_up_var(&clp->cl_state);
> +			return;
> +		}
>  	}
> -	set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
> +
> +	if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
> +		return;
> +
>  	__module_get(THIS_MODULE);
>  	refcount_inc(&clp->cl_count);
>  
> @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
>  			__func__, PTR_ERR(task));
>  		if (!nfs_client_init_is_complete(clp))
>  			nfs_mark_client_ready(clp, PTR_ERR(task));
> +		if (swapon)
> +			clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
>  		nfs4_clear_state_manager_bit(clp);
> -		clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
>  		nfs_put_client(clp);
>  		module_put(THIS_MODULE);
>  	}
> @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void *ptr)
>  
>  	allow_signal(SIGKILL);
>  again:
> -	set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
>  	nfs4_state_manager(clp);
> -	if (atomic_read(&cl->cl_swapper)) {
> +
> +	if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) &&
> +	    !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) {
>  		wait_var_event_interruptible(&clp->cl_state,
>  					     test_bit(NFS4CLNT_RUN_MANAGER,
>  						      &clp->cl_state));
> -		if (atomic_read(&cl->cl_swapper) &&
> -		    test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
> +		if (!atomic_read(&cl->cl_swapper))
> +			clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
> +		if (refcount_read(&clp->cl_count) > 1 && !signalled())
>  			goto again;
>  		/* Either no longer a swapper, or were signalled */
> +		clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
> +		nfs4_clear_state_manager_bit(clp);
>  	}
> -	clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
>  
>  	if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
>  	    test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
> -	    !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state))
> +	    !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state))
>  		goto again;
>  
>  	nfs_put_client(clp);
> -- 
> 2.41.0
> 
> 





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux