> On Aug 3, 2022, at 3:47 PM, Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > On Wed, 2022-08-03 at 10:37 -0400, Chuck Lever wrote: >> nfsd_setattr() can kick off a CB_RECALL (via >> notify_change() -> break_lease()) if a delegation is present. Before >> returning NFS4ERR_DELAY, give the client holding that delegation a >> chance to return it and then retry the nfsd_setattr() again, once. >> >> Signed-off-by: Chuck Lever <chuck.lever@xxxxxxxxxx> >> --- >> fs/nfsd/nfs4proc.c | 18 +++++++++++++++--- >> fs/nfsd/nfs4state.c | 17 +++++++++++++++++ >> fs/nfsd/nfsd.h | 1 + >> fs/nfsd/trace.h | 19 +++++++++++++++++++ >> fs/nfsd/xdr4.h | 2 ++ >> 5 files changed, 54 insertions(+), 3 deletions(-) >> >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index 42bfe0d769ec..62a267bb2ce5 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -1142,7 +1142,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> { >> struct nfsd4_setattr *setattr = &u->setattr; >> __be32 status = nfs_ok; >> - int err; >> + int err, retries; >> >> if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { >> status = nfs4_preprocess_stateid_op(rqstp, cstate, >> @@ -1173,8 +1173,20 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> &setattr->sa_label); >> if (status) >> goto out; >> - status = nfsd_setattr(rqstp, &cstate->current_fh, &setattr->sa_iattr, >> - 0, (time64_t)0); >> + >> + retries = 1; >> + do { >> + status = nfsd_setattr(rqstp, &cstate->current_fh, >> + &setattr->sa_iattr, 0, (time64_t)0); >> + if (status != nfserr_jukebox) >> + break; >> + if (!retries--) >> + break; >> + >> + fh_clear_pre_post_attrs(&cstate->current_fh); >> + nfsd4_wait_for_delegreturn(rqstp, &cstate->current_fh); >> + } while (1); >> + >> out: >> fh_drop_write(&cstate->current_fh); >> return status; >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index 0cf5a4bb36df..e3ac89d4a859 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -4689,6 +4689,23 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) >> return ret; >> } >> >> +/** >> + * nfsd4_wait_for_delegreturn - wait for delegations to be returned >> + * @rqstp: the RPC transaction being executed >> + * @fhp: filehandle of file being waited for >> + * >> + * A better approach would wait for the DELEGRETURN operation, and >> + * retry just as soon as it was done. >> + * >> + * The timeout prevents deadlock if all nfsd threads happen to be >> + * tied up waiting for returning delegations. >> + */ >> +void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, struct svc_fh *fhp) >> +{ >> + trace_nfsd_delegreturn_wait(rqstp, fhp); >> + msleep(NFSD_DELEGRETURN_TIMEOUT); > > Like you mentioned in the cover letter, this is pretty nasty. Right, it's proof-of-concept stuff. > You could use wait_var_event_timeout here on the inode, paired with a > wake_up_var when a delegation is returned. I was looking for an NFSD-specific data structure to add a completion to, but yeah, I guess the inode itself could work. I'll have a look at that for the next version of this series. Thanks for the suggestion! > For the condition, you could use something like this: > > !inode->i_flctx || list_empty(&inode->i_flctx->flc_lease) > > Maybe even a similar lockless check as the one in break_deleg? > >> +} >> + >> static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) >> { >> struct nfs4_delegation *dp = cb_to_delegation(cb); >> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h >> index 9a8b09afc173..0b800a154828 100644 >> --- a/fs/nfsd/nfsd.h >> +++ b/fs/nfsd/nfsd.h >> @@ -341,6 +341,7 @@ void nfsd_lockd_shutdown(void); >> >> #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ >> #define NFSD_COURTESY_CLIENT_TIMEOUT (24 * 60 * 60) /* seconds */ >> +#define NFSD_DELEGRETURN_TIMEOUT (30) /* milliseconds */ >> >> /* >> * The following attributes are currently not supported by the NFSv4 server: >> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h >> index 8c3d5f88072f..dd2654cac132 100644 >> --- a/fs/nfsd/trace.h >> +++ b/fs/nfsd/trace.h >> @@ -443,6 +443,25 @@ DEFINE_NFSD_COPY_ERR_EVENT(clone_file_range_err); >> #include "filecache.h" >> #include "vfs.h" >> >> +TRACE_EVENT(nfsd_delegreturn_wait, >> + TP_PROTO( >> + const struct svc_rqst *rqstp, >> + const struct svc_fh *fhp >> + ), >> + TP_ARGS(rqstp, fhp), >> + TP_STRUCT__entry( >> + __field(u32, xid) >> + __field(u32, fh_hash) >> + ), >> + TP_fast_assign( >> + __entry->xid = be32_to_cpu(rqstp->rq_xid); >> + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); >> + ), >> + TP_printk("xid=0x%08x fh_hash=0x%08x", >> + __entry->xid, __entry->fh_hash >> + ) >> +); >> + >> DECLARE_EVENT_CLASS(nfsd_stateid_class, >> TP_PROTO(stateid_t *stp), >> TP_ARGS(stp), >> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h >> index 7b744011f2d3..5b9213076e95 100644 >> --- a/fs/nfsd/xdr4.h >> +++ b/fs/nfsd/xdr4.h >> @@ -788,6 +788,8 @@ extern __be32 nfsd4_destroy_clientid(struct svc_rqst *, struct nfsd4_compound_st >> union nfsd4_op_u *u); >> __be32 nfsd4_reclaim_complete(struct svc_rqst *, struct nfsd4_compound_state *, >> union nfsd4_op_u *u); >> +extern void nfsd4_wait_for_delegreturn(struct svc_rqst *rqstp, >> + struct svc_fh *fhp); >> extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, >> struct nfsd4_open *open, struct nfsd_net *nn); >> extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, >> >> > > -- > Jeff Layton <jlayton@xxxxxxxxxx> -- Chuck Lever