On Mon, 18 Sep 2023, trondmy@xxxxxxxxxx wrote: > From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > > Commit 4dc73c679114 reintroduces the deadlock that was fixed by commit > aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because it > prevents the setup of new threads to handle reboot recovery, while the > older recovery thread is stuck returning delegations. I hadn't realised that the state manager thread did these two distinct tasks and might need to be doing both at once - requiring two threads. Thanks for highlighting that. It seems to me that even with the new code, we can still get a deadlock when swap is enabled, as we only ever run one thread in that case. Is that correct, or did I miss something? Maybe we need two threads - a state manager and a delegation recall handler. And when swap is enabled, both need to be running permanently ?? Thanks, NeilBrown > > Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if swap is enabled") > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > --- > fs/nfs/nfs4proc.c | 4 +++- > fs/nfs/nfs4state.c | 38 ++++++++++++++++++++++++++------------ > 2 files changed, 29 insertions(+), 13 deletions(-) > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > index 5deeaea8026e..a19e809cad16 100644 > --- a/fs/nfs/nfs4proc.c > +++ b/fs/nfs/nfs4proc.c > @@ -10652,7 +10652,9 @@ static void nfs4_disable_swap(struct inode *inode) > */ > struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; > > - nfs4_schedule_state_manager(clp); > + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > + wake_up_var(&clp->cl_state); > } > > static const struct inode_operations nfs4_dir_inode_operations = { > diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c > index 0bc160fbabec..5751a6886da4 100644 > --- a/fs/nfs/nfs4state.c > +++ b/fs/nfs/nfs4state.c > @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) > { > struct task_struct *task; > char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; > + struct rpc_clnt *clnt = clp->cl_rpcclient; > + bool swapon = false; > > - if (clp->cl_rpcclient->cl_shutdown) > + if (clnt->cl_shutdown) > return; > > set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); > - if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { > - wake_up_var(&clp->cl_state); > - return; > + > + if (atomic_read(&clnt->cl_swapper)) { > + swapon = !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, > + &clp->cl_state); > + if (!swapon) { > + wake_up_var(&clp->cl_state); > + return; > + } > } > - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); > + > + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) > + return; > + > __module_get(THIS_MODULE); > refcount_inc(&clp->cl_count); > > @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) > __func__, PTR_ERR(task)); > if (!nfs_client_init_is_complete(clp)) > nfs_mark_client_ready(clp, PTR_ERR(task)); > + if (swapon) > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > nfs4_clear_state_manager_bit(clp); > - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > nfs_put_client(clp); > module_put(THIS_MODULE); > } > @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void *ptr) > > allow_signal(SIGKILL); > again: > - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); > nfs4_state_manager(clp); > - if (atomic_read(&cl->cl_swapper)) { > + > + if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && > + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) { > wait_var_event_interruptible(&clp->cl_state, > test_bit(NFS4CLNT_RUN_MANAGER, > &clp->cl_state)); > - if (atomic_read(&cl->cl_swapper) && > - test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) > + if (!atomic_read(&cl->cl_swapper)) > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > + if (refcount_read(&clp->cl_count) > 1 && !signalled()) > goto again; > /* Either no longer a swapper, or were signalled */ > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > + nfs4_clear_state_manager_bit(clp); > } > - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > > if (refcount_read(&clp->cl_count) > 1 && !signalled() && > test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && > - !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state)) > + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state)) > goto again; > > nfs_put_client(clp); > -- > 2.41.0 > >