On Wed, Jul 24, 2024 at 10:01:38AM -0700, Sagi Grimberg wrote: > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > Stateid and Locking. > > In addition, for anonymous stateids, check for pending delegations by > the filehandle and client_id, and if a conflict found, recall the delegation > before allowing the read to take place. > > Suggested-by: Dai Ngo <dai.ngo@xxxxxxxxxx> > Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx> > --- > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/nfs4xdr.c | 9 +++++++++ > fs/nfsd/state.h | 2 ++ > fs/nfsd/xdr4.h | 2 ++ > 5 files changed, 80 insertions(+), 2 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index 7b70309ad8fb..324984ec70c6 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > /* check stateid */ > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > &read->rd_stateid, RD_STATE, > - &read->rd_nf, NULL); > - > + &read->rd_nf, &read->rd_wd_stid); > + /* > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > + * delegation stateid used for read. Its refcount is decremented > + * by nfsd4_read_release when read is done. > + */ > + if (!status) { > + if (!read->rd_wd_stid) { > + /* special stateid? */ > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > + &cstate->current_fh); > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > + delegstateid(read->rd_wd_stid)->dl_type != > + NFS4_OPEN_DELEGATE_WRITE) { > + nfs4_put_stid(read->rd_wd_stid); > + read->rd_wd_stid = NULL; > + } > + } > read->rd_rqstp = rqstp; > read->rd_fhp = &cstate->current_fh; > return status; > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > static void > nfsd4_read_release(union nfsd4_op_u *u) > { > + if (u->read.rd_wd_stid) > + nfs4_put_stid(u->read.rd_wd_stid); > if (u->read.rd_nf) > nfsd_file_put(u->read.rd_nf); > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index dc61a8adfcd4..7e6b9fb31a4c 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > get_stateid(cstate, &u->write.wr_stateid); > } > > +/** > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > + * @rqstp: RPC transaction context > + * @clp: nfs client > + * @fhp: nfs file handle > + * @inode: file to be checked for a conflict > + * @modified: return true if file was modified > + * @size: new size of file if modified is true > + * > + * This function is called when there is a conflict between a write > + * delegation and a read that is using a special stateid where the > + * we cannot derive the client stateid exsistence. The server > + * must recall a conflicting delegation before allowing the read > + * to continue. > + * > + * Returns 0 if there is no conflict; otherwise an nfs_stat > + * code is returned. > + */ > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp) > +{ > + struct nfs4_file *fp; > + __be32 status = 0; > + > + fp = nfsd4_file_hash_lookup(fhp); > + if (!fp) > + return nfs_ok; > + > + spin_lock(&state_lock); > + spin_lock(&fp->fi_lock); > + if (!list_empty(&fp->fi_delegations) && > + !nfs4_delegation_exists(clp, fp)) { > + /* conflict, recall deleg */ > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > + NFSD_MAY_READ)); > + if (status) > + goto out; > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > + status = nfserr_jukebox; > + } > +out: > + spin_unlock(&fp->fi_lock); > + spin_unlock(&state_lock); > + return status; > +} > + > + > /** > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > * @rqstp: RPC transaction context > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index c7bfd2180e3f..f0fe526fac3c 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > unsigned long maxcount; > struct file *file; > __be32 *p; > + fmode_t o_fmode = 0; > > if (nfserr) > return nfserr; > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > maxcount = min_t(unsigned long, read->rd_length, > (xdr->buf->buflen - xdr->buf->len)); > > + if (read->rd_wd_stid) { > + /* allow READ using write delegation stateid */ > + o_fmode = file->f_mode; > + file->f_mode |= FMODE_READ; > + } nfsd4_encode_read_plus() needs to handle write delegation stateids as well. I'm not too sure about modifying the f_mode on an nfsd_file you just got from a cache of shared nfsd_files. I think I'd prefer if preprocess_stateid returned an nfsd_file that was already open for read. IIUC that would mean that no changes would be needed here or in nfsd4_encode_read_plus(). Would that be difficult? > if (file->f_op->splice_read && splice_ok) > nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount); > else > nfserr = nfsd4_encode_readv(resp, read, file, maxcount); > + if (o_fmode) > + file->f_mode = o_fmode; > + > if (nfserr) { > xdr_truncate_encode(xdr, starting_len); > return nfserr; > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index ffc217099d19..c1f13b5877c6 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) > return clp->cl_state == NFSD4_EXPIRABLE; > } > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > + struct nfs4_client *clp, struct svc_fh *fhp); > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > struct inode *inode, bool *file_modified, u64 *size); > #endif /* NFSD4_STATE_H */ > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index fbdd42cde1fa..434973a6a8b1 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -426,6 +426,8 @@ struct nfsd4_read { > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh *rd_fhp; /* response */ > u32 rd_eof; /* response */ > + > + struct nfs4_stid *rd_wd_stid; /* internal */ > }; > > struct nfsd4_readdir { > -- > 2.43.0 > -- Chuck Lever