Re: [PATCH 1/2] NFSD: handle GETATTR conflict with write delegation

dai.ngo@xxxxxxxxxx · Fri, 26 May 2023 13:54:12 -0700

On 5/26/23 12:40 PM, Chuck Lever wrote:
On Fri, May 26, 2023 at 12:34:16PM -0700, dai.ngo@xxxxxxxxxx wrote:
On 5/26/23 11:38 AM, Chuck Lever wrote:
On Fri, May 26, 2023 at 10:38:41AM -0700, Dai Ngo wrote:
If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the write delegation is recalled. The server waits a maximum of
90ms for the delegation to be returned before replying NFS4ERR_DELAY
for the GETATTR.

Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
---
   fs/nfsd/nfs4state.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
   fs/nfsd/nfs4xdr.c   |  5 +++++
   fs/nfsd/state.h     |  3 +++
   3 files changed, 56 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b90b74a5e66e..9f551dbf50d6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -8353,3 +8353,51 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
   {
   	get_stateid(cstate, &u->write.wr_stateid);
   }
+
+/**
+ * nfsd4_deleg_getattr_conflict - Trigger recall if GETATTR causes conflict
+ * @rqstp: RPC transaction context
+ * @inode: file to be checked for a conflict
+ *
Let's have this comment explain why this is necessary. At the least,
it needs to cite RFC 8881 Section 18.7.4, which REQUIREs a conflicting
write delegation to be gone before the server can respond to a
change/size GETATTR request.
ok, will add the comment.


+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned.
+ */
+__be32
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+	__be32 status;
+	int cnt;
+	struct file_lock_context *ctx;
+	struct file_lock *fl;
+	struct nfs4_delegation *dp;
+
+	ctx = locks_inode_context(inode);
+	if (!ctx)
+		return 0;
+	spin_lock(&ctx->flc_lock);
+	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+		if (fl->fl_flags == FL_LAYOUT ||
+				fl->fl_lmops != &nfsd_lease_mng_ops)
+			continue;
+		if (fl->fl_type == F_WRLCK) {
+			dp = fl->fl_owner;
+			if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+				spin_unlock(&ctx->flc_lock);
+				return 0;
+			}
+			spin_unlock(&ctx->flc_lock);
+			status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+			if (status != nfserr_jukebox)
+				return status;
+			for (cnt = 3; cnt > 0; --cnt) {
+				if (!nfsd_wait_for_delegreturn(rqstp, inode))
+					continue;
+				return 0;
+			}
I'd rather not retry here. Can you can say why a 30ms wait is not
sufficient for this case?
on my VMs, it takes about 80ms for the the delegation return to complete.
I'd rather not tune for tiny VM guests. How long does it take for a
native client to handle CB_RECALL and return the delegation? It
shouldn't take longer to do so than it would for the other cases the
server already handles in under 30ms.

Even 30ms is a long time to hold up an nfsd thread, IMO.

If the client takes less than 30ms to return the delegation then the
server will reply to the GETATTR right away, it does not wait for the
whole 90ms.

The 90ms is for the worst case scenario where the client/network is slow
or under load. Even if the server waits for the whole 90ms it's still
faster to reply to the GETATTR than sending CB_RECALL and wait for
DELEGRETURN before the server can reply to the GETATTR.

-Dai



+			return status;
+		}
+		break;
+	}
+	spin_unlock(&ctx->flc_lock);
+	return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b83954fc57e3..4590b893dbc8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2970,6 +2970,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
   		if (status)
   			goto out;
   	}
+	if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+		status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+		if (status)
+			goto out;
+	}
   	err = vfs_getattr(&path, &stat,
   			  STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d49d3060ed4f..cbddcf484dba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
   	cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
   	return clp->cl_state == NFSD4_EXPIRABLE;
   }
+
+extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
+				struct inode *inode);
   #endif   /* NFSD4_STATE_H */
--
2.9.5