On Fri, 2024-07-26 at 09:54 -0400, Jeff Layton wrote: > On Wed, 2024-07-24 at 10:01 -0700, Sagi Grimberg wrote: > > Based on a patch fom Dai Ngo, allow NFSv4 client to use write > > delegation > > stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the > > Stateid and Locking. > > > > In addition, for anonymous stateids, check for pending delegations > > by > > the filehandle and client_id, and if a conflict found, recall the > > delegation > > before allowing the read to take place. > > > > Suggested-by: Dai Ngo <dai.ngo@xxxxxxxxxx> > > Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx> > > --- > > fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++-- > > fs/nfsd/nfs4state.c | 47 > > +++++++++++++++++++++++++++++++++++++++++++++ > > fs/nfsd/nfs4xdr.c | 9 +++++++++ > > fs/nfsd/state.h | 2 ++ > > fs/nfsd/xdr4.h | 2 ++ > > 5 files changed, 80 insertions(+), 2 deletions(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index 7b70309ad8fb..324984ec70c6 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct > > nfsd4_compound_state *cstate, > > /* check stateid */ > > status = nfs4_preprocess_stateid_op(rqstp, cstate, > > &cstate->current_fh, > > &read->rd_stateid, > > RD_STATE, > > - &read->rd_nf, NULL); > > - > > + &read->rd_nf, &read- > > >rd_wd_stid); > > In the case where we have multiple stateids for this client, are we > guaranteed to get back the delegation stateid here? What if we get > back > the open stateid for the O_WRONLY open instead? > Oh, duh -- disregard this. We're looking for a specific stateid here, so if the client sends a delegation stateid, that's what we'll find. > > > + /* > > + * rd_wd_stid is needed for nfsd4_encode_read to allow > > write > > + * delegation stateid used for read. Its refcount is > > decremented > > + * by nfsd4_read_release when read is done. > > + */ > > + if (!status) { > > + if (!read->rd_wd_stid) { > > + /* special stateid? */ > > + status = nfsd4_deleg_read_conflict(rqstp, > > cstate->clp, > > + &cstate->current_fh); > > + } else if (read->rd_wd_stid->sc_type != > > SC_TYPE_DELEG || > > + delegstateid(read->rd_wd_stid)->dl_type > > != > > + NFS4_OPEN_DELEGATE > > _WRITE) { > > + nfs4_put_stid(read->rd_wd_stid); > > + read->rd_wd_stid = NULL; > > + } > > + } > > read->rd_rqstp = rqstp; > > read->rd_fhp = &cstate->current_fh; > > return status; > > @@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct > > nfsd4_compound_state *cstate, > > static void > > nfsd4_read_release(union nfsd4_op_u *u) > > { > > + if (u->read.rd_wd_stid) > > + nfs4_put_stid(u->read.rd_wd_stid); > > if (u->read.rd_nf) > > nfsd_file_put(u->read.rd_nf); > > trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index dc61a8adfcd4..7e6b9fb31a4c 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct > > nfsd4_compound_state *cstate, > > get_stateid(cstate, &u->write.wr_stateid); > > } > > > > +/** > > + * nfsd4_deleg_read_conflict - Recall if read causes conflict > > + * @rqstp: RPC transaction context > > + * @clp: nfs client > > + * @fhp: nfs file handle > > + * @inode: file to be checked for a conflict > > + * @modified: return true if file was modified > > + * @size: new size of file if modified is true > > + * > > + * This function is called when there is a conflict between a > > write > > + * delegation and a read that is using a special stateid where the > > + * we cannot derive the client stateid exsistence. The server > > + * must recall a conflicting delegation before allowing the read > > + * to continue. > > + * > > + * Returns 0 if there is no conflict; otherwise an nfs_stat > > + * code is returned. > > + */ > > +__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > + struct nfs4_client *clp, struct svc_fh *fhp) > > +{ > > + struct nfs4_file *fp; > > + __be32 status = 0; > > + > > + fp = nfsd4_file_hash_lookup(fhp); > > + if (!fp) > > + return nfs_ok; > > + > > + spin_lock(&state_lock); > > + spin_lock(&fp->fi_lock); > > + if (!list_empty(&fp->fi_delegations) && > > + !nfs4_delegation_exists(clp, fp)) { > > + /* conflict, recall deleg */ > > + status = nfserrno(nfsd_open_break_lease(fp- > > >fi_inode, > > + NFSD_MAY_READ)); > > + if (status) > > + goto out; > > + if (!nfsd_wait_for_delegreturn(rqstp, fp- > > >fi_inode)) > > + status = nfserr_jukebox; > > + } > > +out: > > + spin_unlock(&fp->fi_lock); > > + spin_unlock(&state_lock); > > + return status; > > +} > > + > > + > > /** > > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes > > conflict > > * @rqstp: RPC transaction context > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index c7bfd2180e3f..f0fe526fac3c 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres > > *resp, __be32 nfserr, > > unsigned long maxcount; > > struct file *file; > > __be32 *p; > > + fmode_t o_fmode = 0; > > > > if (nfserr) > > return nfserr; > > @@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres > > *resp, __be32 nfserr, > > maxcount = min_t(unsigned long, read->rd_length, > > (xdr->buf->buflen - xdr->buf->len)); > > > > + if (read->rd_wd_stid) { > > + /* allow READ using write delegation stateid */ > > + o_fmode = file->f_mode; > > + file->f_mode |= FMODE_READ; > > + } ^^^ I do agree with Chuck that this looks sketchy. > > if (file->f_op->splice_read && splice_ok) > > nfserr = nfsd4_encode_splice_read(resp, read, > > file, maxcount); > > else > > nfserr = nfsd4_encode_readv(resp, read, file, > > maxcount); > > + if (o_fmode) > > + file->f_mode = o_fmode; > > + > > if (nfserr) { > > xdr_truncate_encode(xdr, starting_len); > > return nfserr; > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > > index ffc217099d19..c1f13b5877c6 100644 > > --- a/fs/nfsd/state.h > > +++ b/fs/nfsd/state.h > > @@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct > > nfs4_client *clp) > > return clp->cl_state == NFSD4_EXPIRABLE; > > } > > > > +extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp, > > + struct nfs4_client *clp, struct svc_fh *fhp); > > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > > struct inode *inode, bool *file_modified, u64 > > *size); > > #endif /* NFSD4_STATE_H */ > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > index fbdd42cde1fa..434973a6a8b1 100644 > > --- a/fs/nfsd/xdr4.h > > +++ b/fs/nfsd/xdr4.h > > @@ -426,6 +426,8 @@ struct nfsd4_read { > > struct svc_rqst *rd_rqstp; /* > > response */ > > struct svc_fh *rd_fhp; /* response */ > > u32 rd_eof; /* response */ > > + > > + struct nfs4_stid *rd_wd_stid; /* > > internal */ > > }; > > > > struct nfsd4_readdir { > -- Jeff Layton <jlayton@xxxxxxxxxx>