On Sat, Jul 27, 2024 at 03:53:41PM -0400, Chuck Lever wrote: > On Sat, Jul 27, 2024 at 10:26:24PM +0300, Sagi Grimberg wrote: > > > > > > > > On 26/07/2024 17:06, Chuck Lever wrote: > > > On Wed, Jul 24, 2024 at 10:01:38AM -0700, Sagi Grimberg wrote: > > > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation > > > > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > > > > Stateid and Locking. > > > > > > > > In addition, for anonymous stateids, check for pending delegations by > > > > the filehandle and client_id, and if a conflict found, recall the delegation > > > > before allowing the read to take place. > > > > > > > > Suggested-by: Dai Ngo <dai.ngo@xxxxxxxxxx> > > > > Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx> > > > > --- > > > > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > > > > fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ > > > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > > > fs/nfsd/state.h | 2 ++ > > > > fs/nfsd/xdr4.h | 2 ++ > > > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > > > index 7b70309ad8fb..324984ec70c6 100644 > > > > --- a/fs/nfsd/nfs4proc.c > > > > +++ b/fs/nfsd/nfs4proc.c > > > > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > > /* check stateid */ > > > > status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > > > &read->rd_stateid, RD_STATE, > > > > - &read->rd_nf, NULL); > > > > - > > > > + &read->rd_nf, &read->rd_wd_stid); > > > > + /* > > > > + * rd_wd_stid is needed for nfsd4_encode_read to allow write > > > > + * delegation stateid used for read. Its refcount is decremented > > > > + * by nfsd4_read_release when read is done. > > > > + */ > > > > + if (!status) { > > > > + if (!read->rd_wd_stid) { > > > > + /* special stateid? */ > > > > + status = nfsd4_deleg_read_conflict(rqstp, cstate->clp, > > > > + &cstate->current_fh); > > > > + } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG || > > > > + delegstateid(read->rd_wd_stid)->dl_type != > > > > + NFS4_OPEN_DELEGATE_WRITE) { > > > > + nfs4_put_stid(read->rd_wd_stid); > > > > + read->rd_wd_stid = NULL; > > > > + } > > > > + } > > > > read->rd_rqstp = rqstp; > > > > read->rd_fhp = &cstate->current_fh; > > > > return status; > > > > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > > > static void > > > > nfsd4_read_release(union nfsd4_op_u *u) > > > > { > > > > + if (u->read.rd_wd_stid) > > > > + nfs4_put_stid(u->read.rd_wd_stid); > > > > if (u->read.rd_nf) > > > > nfsd_file_put(u->read.rd_nf); > > > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > > > index dc61a8adfcd4..7e6b9fb31a4c 100644 > > > > --- a/fs/nfsd/nfs4state.c > > > > +++ b/fs/nfsd/nfs4state.c > > > > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > > > > get_stateid(cstate, &u->write.wr_stateid); > > > > } > > > > +/** > > > > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > > > > + * @rqstp: RPC transaction context > > > > + * @clp: nfs client > > > > + * @fhp: nfs file handle > > > > + * @inode: file to be checked for a conflict > > > > + * @modified: return true if file was modified > > > > + * @size: new size of file if modified is true > > > > + * > > > > + * This function is called when there is a conflict between a write > > > > + * delegation and a read that is using a special stateid where the > > > > + * we cannot derive the client stateid exsistence. The server > > > > + * must recall a conflicting delegation before allowing the read > > > > + * to continue. > > > > + * > > > > + * Returns 0 if there is no conflict; otherwise an nfs_stat > > > > + * code is returned. > > > > + */ > > > > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > > > + struct nfs4_client *clp, struct svc_fh *fhp) > > > > +{ > > > > + struct nfs4_file *fp; > > > > + __be32 status = 0; > > > > + > > > > + fp = nfsd4_file_hash_lookup(fhp); > > > > + if (!fp) > > > > + return nfs_ok; > > > > + > > > > + spin_lock(&state_lock); > > > > + spin_lock(&fp->fi_lock); > > > > + if (!list_empty(&fp->fi_delegations) && > > > > + !nfs4_delegation_exists(clp, fp)) { > > > > + /* conflict, recall deleg */ > > > > + status = nfserrno(nfsd_open_break_lease(fp->fi_inode, > > > > + NFSD_MAY_READ)); > > > > + if (status) > > > > + goto out; > > > > + if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode)) > > > > + status = nfserr_jukebox; > > > > + } > > > > +out: > > > > + spin_unlock(&fp->fi_lock); > > > > + spin_unlock(&state_lock); > > > > + return status; > > > > +} > > > > + > > > > + > > > > /** > > > > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > > > > * @rqstp: RPC transaction context > > > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > > > index c7bfd2180e3f..f0fe526fac3c 100644 > > > > --- a/fs/nfsd/nfs4xdr.c > > > > +++ b/fs/nfsd/nfs4xdr.c > > > > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > > unsigned long maxcount; > > > > struct file *file; > > > > __be32 *p; > > > > + fmode_t o_fmode = 0; > > > > if (nfserr) > > > > return nfserr; > > > > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > > > maxcount = min_t(unsigned long, read->rd_length, > > > > (xdr->buf->buflen - xdr->buf->len)); > > > > + if (read->rd_wd_stid) { > > > > + /* allow READ using write delegation stateid */ > > > > + o_fmode = file->f_mode; > > > > + file->f_mode |= FMODE_READ; > > > > + } > > > nfsd4_encode_read_plus() needs to handle write delegation stateids > > > as well. > > > > Yes, missed that one. > > > > > > > > I'm not too sure about modifying the f_mode on an nfsd_file you > > > just got from a cache of shared nfsd_files. > > > > If that is a problem, nfsd can open the file with rw access to begin with > > if a write deleg was given? > > IMO, it's not a question of whether there is a write delegation, but > rather what intent the client told the server during the OPEN. The > server would need to open the local file for R/W when it gets a > O_WRONLY open. Not saying this is the right thing to do, just > thinking out loud... OK, thinking about this again... The nfsd_file that is associated with a write delegation is probably going to need to be a local O_RDWR open, if a write delegation state ID can be used for READ or WRITE. -- Chuck Lever