On 7/24/24 10:01 AM, Sagi Grimberg wrote:
Based on a patch fom Dai Ngo, allow NFSv4 client to use write delegation
stateid for READ operation. Per RFC 8881 section 9.1.2. Use of the
Stateid and Locking.
In addition, for anonymous stateids, check for pending delegations by
the filehandle and client_id, and if a conflict found, recall the delegation
before allowing the read to take place.
Suggested-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx>
---
fs/nfsd/nfs4proc.c | 22 +++++++++++++++++++--
fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4xdr.c | 9 +++++++++
fs/nfsd/state.h | 2 ++
fs/nfsd/xdr4.h | 2 ++
5 files changed, 80 insertions(+), 2 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 7b70309ad8fb..324984ec70c6 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -979,8 +979,24 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* check stateid */
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&read->rd_stateid, RD_STATE,
- &read->rd_nf, NULL);
-
+ &read->rd_nf, &read->rd_wd_stid);
+ /*
+ * rd_wd_stid is needed for nfsd4_encode_read to allow write
+ * delegation stateid used for read. Its refcount is decremented
+ * by nfsd4_read_release when read is done.
+ */
+ if (!status) {
+ if (!read->rd_wd_stid) {
+ /* special stateid? */
+ status = nfsd4_deleg_read_conflict(rqstp, cstate->clp,
+ &cstate->current_fh);
+ } else if (read->rd_wd_stid->sc_type != SC_TYPE_DELEG ||
+ delegstateid(read->rd_wd_stid)->dl_type !=
+ NFS4_OPEN_DELEGATE_WRITE) {
+ nfs4_put_stid(read->rd_wd_stid);
+ read->rd_wd_stid = NULL;
+ }
+ }
read->rd_rqstp = rqstp;
read->rd_fhp = &cstate->current_fh;
return status;
@@ -990,6 +1006,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static void
nfsd4_read_release(union nfsd4_op_u *u)
{
+ if (u->read.rd_wd_stid)
+ nfs4_put_stid(u->read.rd_wd_stid);
if (u->read.rd_nf)
nfsd_file_put(u->read.rd_nf);
trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index dc61a8adfcd4..7e6b9fb31a4c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -8805,6 +8805,53 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
get_stateid(cstate, &u->write.wr_stateid);
}
+/**
+ * nfsd4_deleg_read_conflict - Recall if read causes conflict
+ * @rqstp: RPC transaction context
+ * @clp: nfs client
+ * @fhp: nfs file handle
+ * @inode: file to be checked for a conflict
+ * @modified: return true if file was modified
+ * @size: new size of file if modified is true
+ *
+ * This function is called when there is a conflict between a write
+ * delegation and a read that is using a special stateid where the
+ * we cannot derive the client stateid exsistence. The server
+ * must recall a conflicting delegation before allowing the read
+ * to continue.
+ *
+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned.
+ */
+__be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp,
+ struct nfs4_client *clp, struct svc_fh *fhp)
+{
+ struct nfs4_file *fp;
+ __be32 status = 0;
+
+ fp = nfsd4_file_hash_lookup(fhp);
+ if (!fp)
+ return nfs_ok;
+
+ spin_lock(&state_lock);
+ spin_lock(&fp->fi_lock);
+ if (!list_empty(&fp->fi_delegations) &&
+ !nfs4_delegation_exists(clp, fp)) {
+ /* conflict, recall deleg */
I don't see how we can have a delegation conflict here. If a client
has a write delegation then there should not be any delegations from
other clients.
-Dai
+ status = nfserrno(nfsd_open_break_lease(fp->fi_inode,
+ NFSD_MAY_READ));
+ if (status)
+ goto out;
+ if (!nfsd_wait_for_delegreturn(rqstp, fp->fi_inode))
+ status = nfserr_jukebox;
+ }
+out:
+ spin_unlock(&fp->fi_lock);
+ spin_unlock(&state_lock);
+ return status;
+}
+
+
/**
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c7bfd2180e3f..f0fe526fac3c 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4418,6 +4418,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
unsigned long maxcount;
struct file *file;
__be32 *p;
+ fmode_t o_fmode = 0;
if (nfserr)
return nfserr;
@@ -4437,10 +4438,18 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));
+ if (read->rd_wd_stid) {
+ /* allow READ using write delegation stateid */
+ o_fmode = file->f_mode;
+ file->f_mode |= FMODE_READ;
+ }
if (file->f_op->splice_read && splice_ok)
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+ if (o_fmode)
+ file->f_mode = o_fmode;
+
if (nfserr) {
xdr_truncate_encode(xdr, starting_len);
return nfserr;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ffc217099d19..c1f13b5877c6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -780,6 +780,8 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
return clp->cl_state == NFSD4_EXPIRABLE;
}
+extern __be32 nfsd4_deleg_read_conflict(struct svc_rqst *rqstp,
+ struct nfs4_client *clp, struct svc_fh *fhp);
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
struct inode *inode, bool *file_modified, u64 *size);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index fbdd42cde1fa..434973a6a8b1 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -426,6 +426,8 @@ struct nfsd4_read {
struct svc_rqst *rd_rqstp; /* response */
struct svc_fh *rd_fhp; /* response */
u32 rd_eof; /* response */
+
+ struct nfs4_stid *rd_wd_stid; /* internal */
};
struct nfsd4_readdir {