On Fri, 2023-12-15 at 11:15 -0800, Dai Ngo wrote: > Under some load conditions the callback work request can not be queued > and nfsd4_run_cb returns 0 to caller. When this happens, the sc_count > of the delegation state was left with an extra reference count preventing > the state to be freed later. > > Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx> > --- > fs/nfsd/nfs4state.c | 17 +++++++++++++---- > 1 file changed, 13 insertions(+), 4 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 40415929e2ae..175f3e9f5822 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -2947,8 +2947,14 @@ void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) > > if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) > return; > + > refcount_inc(&dp->dl_stid.sc_count); > - nfsd4_run_cb(&ncf->ncf_getattr); > + if (!nfsd4_run_cb(&ncf->ncf_getattr)) { > + refcount_dec(&dp->dl_stid.sc_count); > + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); > + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY); > + WARN_ON_ONCE(1); > + } > } > > static struct nfs4_client *create_client(struct xdr_netobj name, > @@ -4967,7 +4973,10 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) > * we know it's safe to take a reference. > */ > refcount_inc(&dp->dl_stid.sc_count); > - WARN_ON_ONCE(!nfsd4_run_cb(&dp->dl_recall)); > + if (!nfsd4_run_cb(&dp->dl_recall)) { > + refcount_dec(&dp->dl_stid.sc_count); > + WARN_ON_ONCE(1); > + } > } > > /* Called from break_lease() with flc_lock held. */ > @@ -8543,12 +8552,12 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, > return 0; > } > break_lease: > - spin_unlock(&ctx->flc_lock); > nfsd_stats_wdeleg_getattr_inc(); > - > dp = fl->fl_owner; > ncf = &dp->dl_cb_fattr; > nfs4_cb_getattr(&dp->dl_cb_fattr); > + spin_unlock(&ctx->flc_lock); > + The other hunks in this patch make sense, but what's going on here with moving the lock down? Do we really need to hold the spinlock there? If so, I would have expected to see an explanation in the changelog. > wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE); > if (ncf->ncf_cb_status) { > status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); -- Jeff Layton <jlayton@xxxxxxxxxx>