Re: [PATCH 1/2] NFSD: handle GETATTR conflict with write delegation

dai.ngo@xxxxxxxxxx · Fri, 26 May 2023 12:36:27 -0700

On 5/26/23 12:34 PM, dai.ngo@xxxxxxxxxx wrote:

On 5/26/23 11:38 AM, Chuck Lever wrote:
On Fri, May 26, 2023 at 10:38:41AM -0700, Dai Ngo wrote:
If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the write delegation is recalled. The server waits a maximum of
90ms for the delegation to be returned before replying NFS4ERR_DELAY
for the GETATTR.

Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
---
  fs/nfsd/nfs4state.c | 48 
++++++++++++++++++++++++++++++++++++++++++++++++
  fs/nfsd/nfs4xdr.c   |  5 +++++
  fs/nfsd/state.h     |  3 +++
  3 files changed, 56 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b90b74a5e66e..9f551dbf50d6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -8353,3 +8353,51 @@ nfsd4_get_writestateid(struct 
nfsd4_compound_state *cstate,
  {
      get_stateid(cstate, &u->write.wr_stateid);
  }
+
+/**
+ * nfsd4_deleg_getattr_conflict - Trigger recall if GETATTR causes 
conflict
+ * @rqstp: RPC transaction context
+ * @inode: file to be checked for a conflict
+ *
Let's have this comment explain why this is necessary. At the least,
it needs to cite RFC 8881 Section 18.7.4, which REQUIREs a conflicting
write delegation to be gone before the server can respond to a
change/size GETATTR request.

ok, will add the comment.



+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned.
+ */
+__be32
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode 
*inode)
+{
+    __be32 status;
+    int cnt;
+    struct file_lock_context *ctx;
+    struct file_lock *fl;
+    struct nfs4_delegation *dp;
+
+    ctx = locks_inode_context(inode);
+    if (!ctx)
+        return 0;
+    spin_lock(&ctx->flc_lock);
+    list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+        if (fl->fl_flags == FL_LAYOUT ||
+                fl->fl_lmops != &nfsd_lease_mng_ops)
+            continue;
+        if (fl->fl_type == F_WRLCK) {
+            dp = fl->fl_owner;
+            if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+                spin_unlock(&ctx->flc_lock);
+                return 0;
+            }
+            spin_unlock(&ctx->flc_lock);
+            status = nfserrno(nfsd_open_break_lease(inode, 
NFSD_MAY_READ));
+            if (status != nfserr_jukebox)
+                return status;
+            for (cnt = 3; cnt > 0; --cnt) {
+                if (!nfsd_wait_for_delegreturn(rqstp, inode))
+                    continue;
+                return 0;
+            }
I'd rather not retry here. Can you can say why a 30ms wait is not
sufficient for this case?

on my VMs, it takes about 80ms for the the delegation return to complete.

Otherwise it takes about 180ms for the CB_RECALL and DELEGRETURN to complete
before the client can get a successful reply of the GETATTR.

-Dai


-Dai



+            return status;
+        }
+        break;
+    }
+    spin_unlock(&ctx->flc_lock);
+    return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b83954fc57e3..4590b893dbc8 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2970,6 +2970,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, 
struct svc_fh *fhp,
          if (status)
              goto out;
      }
+    if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+        status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+        if (status)
+            goto out;
+    }
        err = vfs_getattr(&path, &stat,
                STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d49d3060ed4f..cbddcf484dba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct 
nfs4_client *clp)
      cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
      return clp->cl_state == NFSD4_EXPIRABLE;
  }
+
+extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
+                struct inode *inode);
  #endif   /* NFSD4_STATE_H */
--
2.9.5