Re: [PATCH 2/2] NFSv4: Fix a state manager thread deadlock regression

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 2023-09-20 at 15:38 -0400, Anna Schumaker wrote:
> Hi Trond,
> 
> On Sun, Sep 17, 2023 at 7:12 PM <trondmy@xxxxxxxxxx> wrote:
> > 
> > From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> > 
> > Commit 4dc73c679114 reintroduces the deadlock that was fixed by
> > commit
> > aeabb3c96186 ("NFSv4: Fix a NFSv4 state manager deadlock") because
> > it
> > prevents the setup of new threads to handle reboot recovery, while
> > the
> > older recovery thread is stuck returning delegations.
> 
> I'm seeing a possible deadlock with xfstests generic/472 on NFS v4.x
> after applying this patch. The test itself checks for various
> swapfile
> edge cases, so it seems likely something is going on there.
> 
> Let me know if you need more info
> Anna
> 

Did you turn off delegations on your server? If you don't, then swap
will deadlock itself under various scenarios.

> > 
> > Fixes: 4dc73c679114 ("NFSv4: keep state manager thread active if
> > swap is enabled")
> > Cc: stable@xxxxxxxxxxxxxxx
> > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> > ---
> >  fs/nfs/nfs4proc.c  |  4 +++-
> >  fs/nfs/nfs4state.c | 38 ++++++++++++++++++++++++++------------
> >  2 files changed, 29 insertions(+), 13 deletions(-)
> > 
> > diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> > index 5deeaea8026e..a19e809cad16 100644
> > --- a/fs/nfs/nfs4proc.c
> > +++ b/fs/nfs/nfs4proc.c
> > @@ -10652,7 +10652,9 @@ static void nfs4_disable_swap(struct inode
> > *inode)
> >          */
> >         struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
> > 
> > -       nfs4_schedule_state_manager(clp);
> > +       set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
> > +       clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
> > +       wake_up_var(&clp->cl_state);
> >  }
> > 
> >  static const struct inode_operations nfs4_dir_inode_operations = {
> > diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
> > index 0bc160fbabec..5751a6886da4 100644
> > --- a/fs/nfs/nfs4state.c
> > +++ b/fs/nfs/nfs4state.c
> > @@ -1209,16 +1209,26 @@ void nfs4_schedule_state_manager(struct
> > nfs_client *clp)
> >  {
> >         struct task_struct *task;
> >         char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
> > +       struct rpc_clnt *clnt = clp->cl_rpcclient;
> > +       bool swapon = false;
> > 
> > -       if (clp->cl_rpcclient->cl_shutdown)
> > +       if (clnt->cl_shutdown)
> >                 return;
> > 
> >         set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state);
> > -       if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp-
> > >cl_state) != 0) {
> > -               wake_up_var(&clp->cl_state);
> > -               return;
> > +
> > +       if (atomic_read(&clnt->cl_swapper)) {
> > +               swapon =
> > !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE,
> > +                                          &clp->cl_state);
> > +               if (!swapon) {
> > +                       wake_up_var(&clp->cl_state);
> > +                       return;
> > +               }
> >         }
> > -       set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
> > +
> > +       if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp-
> > >cl_state) != 0)
> > +               return;
> > +
> >         __module_get(THIS_MODULE);
> >         refcount_inc(&clp->cl_count);
> > 
> > @@ -1235,8 +1245,9 @@ void nfs4_schedule_state_manager(struct
> > nfs_client *clp)
> >                         __func__, PTR_ERR(task));
> >                 if (!nfs_client_init_is_complete(clp))
> >                         nfs_mark_client_ready(clp, PTR_ERR(task));
> > +               if (swapon)
> > +                       clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp-
> > >cl_state);
> >                 nfs4_clear_state_manager_bit(clp);
> > -               clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp-
> > >cl_state);
> >                 nfs_put_client(clp);
> >                 module_put(THIS_MODULE);
> >         }
> > @@ -2748,22 +2759,25 @@ static int nfs4_run_state_manager(void
> > *ptr)
> > 
> >         allow_signal(SIGKILL);
> >  again:
> > -       set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
> >         nfs4_state_manager(clp);
> > -       if (atomic_read(&cl->cl_swapper)) {
> > +
> > +       if (test_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) &&
> > +           !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp-
> > >cl_state)) {
> >                 wait_var_event_interruptible(&clp->cl_state,
> >                                             
> > test_bit(NFS4CLNT_RUN_MANAGER,
> >                                                       &clp-
> > >cl_state));
> > -               if (atomic_read(&cl->cl_swapper) &&
> > -                   test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state))
> > +               if (!atomic_read(&cl->cl_swapper))
> > +                       clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp-
> > >cl_state);
> > +               if (refcount_read(&clp->cl_count) > 1 &&
> > !signalled())
> >                         goto again;
> >                 /* Either no longer a swapper, or were signalled */
> > +               clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp-
> > >cl_state);
> > +               nfs4_clear_state_manager_bit(clp);
> >         }
> > -       clear_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state);
> > 
> >         if (refcount_read(&clp->cl_count) > 1 && !signalled() &&
> >             test_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state) &&
> > -           !test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp-
> > >cl_state))
> > +           !test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp-
> > >cl_state))
> >                 goto again;
> > 
> >         nfs_put_client(clp);
> > --
> > 2.41.0
> > 

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@xxxxxxxxxxxxxxx






[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux