On Wed, 2023-09-20 at 15:38 -0400, Anna Schumaker wrote: > Hi Trond, > > On Sun, Sep 17, 2023 at 7:12 PM <trondmy@xxxxxxxxxx> wrote: > > > > From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > > > > Commit 4dc73c679114 reintroduces the deadlock that was fixed by > > commit > > aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because > > it > > prevents the setup of new threads to handle reboot recovery, while > > the > > older recovery thread is stuck returning delegations. > > I'm seeing a possible deadlock with xfstests generic/472 on NFS v4.x > after applying this patch. The test itself checks for various > swapfile > edge cases, so it seems likely something is going on there. > > Let me know if you need more info > Anna > Did you turn off delegations on your server? If you don't, then swap will deadlock itself under various scenarios. > > > > Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if > > swap is enabled") > > Cc: stable@xxxxxxxxxxxxxxx > > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > > --- > > fs/nfs/nfs4proc.c | 4 +++- > > fs/nfs/nfs4state.c | 38 ++++++++++++++++++++++++++------------ > > 2 files changed, 29 insertions(+), 13 deletions(-) > > > > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c > > index 5deeaea8026e..a19e809cad16 100644 > > --- a/fs/nfs/nfs4proc.c > > +++ b/fs/nfs/nfs4proc.c > > @@ -10652,7 +10652,9 @@ static void nfs4_disable_swap(struct inode > > *inode) > > */ > > struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; > > > > - nfs4_schedule_state_manager(clp); > > + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); > > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > > + wake_up_var(&clp->cl_state); > > } > > > > static const struct inode_operations nfs4_dir_inode_operations = { > > diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c > > index 0bc160fbabec..5751a6886da4 100644 > > --- a/fs/nfs/nfs4state.c > > +++ b/fs/nfs/nfs4state.c > > @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct > > nfs_client *clp) > > { > > struct task_struct *task; > > char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; > > + struct rpc_clnt *clnt = clp->cl_rpcclient; > > + bool swapon = false; > > > > - if (clp->cl_rpcclient->cl_shutdown) > > + if (clnt->cl_shutdown) > > return; > > > > set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); > > - if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp- > > >cl_state) != 0) { > > - wake_up_var(&clp->cl_state); > > - return; > > + > > + if (atomic_read(&clnt->cl_swapper)) { > > + swapon = > > !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, > > + &clp->cl_state); > > + if (!swapon) { > > + wake_up_var(&clp->cl_state); > > + return; > > + } > > } > > - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); > > + > > + if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp- > > >cl_state) != 0) > > + return; > > + > > __module_get(THIS_MODULE); > > refcount_inc(&clp->cl_count); > > > > @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct > > nfs_client *clp) > > __func__, PTR_ERR(task)); > > if (!nfs_client_init_is_complete(clp)) > > nfs_mark_client_ready(clp, PTR_ERR(task)); > > + if (swapon) > > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp- > > >cl_state); > > nfs4_clear_state_manager_bit(clp); > > - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp- > > >cl_state); > > nfs_put_client(clp); > > module_put(THIS_MODULE); > > } > > @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void > > *ptr) > > > > allow_signal(SIGKILL); > > again: > > - set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state); > > nfs4_state_manager(clp); > > - if (atomic_read(&cl->cl_swapper)) { > > + > > + if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) && > > + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp- > > >cl_state)) { > > wait_var_event_interruptible(&clp->cl_state, > > > > test_bit(NFS4CLNT_RUN_MANAGER, > > &clp- > > >cl_state)); > > - if (atomic_read(&cl->cl_swapper) && > > - test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state)) > > + if (!atomic_read(&cl->cl_swapper)) > > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp- > > >cl_state); > > + if (refcount_read(&clp->cl_count) > 1 && > > !signalled()) > > goto again; > > /* Either no longer a swapper, or were signalled */ > > + clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp- > > >cl_state); > > + nfs4_clear_state_manager_bit(clp); > > } > > - clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state); > > > > if (refcount_read(&clp->cl_count) > 1 && !signalled() && > > test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) && > > - !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp- > > >cl_state)) > > + !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp- > > >cl_state)) > > goto again; > > > > nfs_put_client(clp); > > -- > > 2.41.0 > > -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@xxxxxxxxxxxxxxx