Re: [PATCH v2 4/4] NFSD: handle GETATTR conflict with write delegation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 5/15/23 11:58 AM, Jeff Layton wrote:
On Mon, 2023-05-15 at 11:26 -0700, dai.ngo@xxxxxxxxxx wrote:
On 5/15/23 11:14 AM, Olga Kornievskaia wrote:
On Sun, May 14, 2023 at 8:56 PM Dai Ngo <dai.ngo@xxxxxxxxxx> wrote:
If the GETATTR request on a file that has write delegation in effect
and the request attributes include the change info and size attribute
then the request is handled as below:

Server sends CB_GETATTR to client to get the latest change info and file
size. If these values are the same as the server's cached values then
the GETATTR proceeds as normal.

If either the change info or file size is different from the server's
cached values, or the file was already marked as modified, then:

     . update time_modify and time_metadata into file's metadata
       with current time

     . encode GETATTR as normal except the file size is encoded with
       the value returned from CB_GETATTR

     . mark the file as modified

If the CB_GETATTR fails for any reasons, the delegation is recalled
and NFS4ERR_DELAY is returned for the GETATTR.
Hi Dai,

I'm curious what does the server gain by implementing handling of
GETATTR with delegations? As far as I can tell it is not strictly
required by the RFC(s). When the file is being written any attempt at
querying its attribute is immediately stale.
Yes, you're right that handling of GETATTR with delegations is not
required by the spec. The only benefit I see is that the server
provides a more accurate state of the file as whether the file has
been changed/updated since the client's last GETATTR. This allows
the app on the client to take appropriate action (whatever that
might be) when sharing files among multiple clients.



 From RFC 8881 10.4.3:

"It should be noted that the server is under no obligation to use
CB_GETATTR, and therefore the server MAY simply recall the delegation to
avoid its use."

As I see it, the main benefit is that you avoid having to recall a write
delegation when someone does a drive-by stat() on the file (e.g. due to
a "ls -l" in its parent directory).

Yes, that's right I forgot to mention this. If we don't want to recall the
write delegation then we have to support CB_GETATTR.

Thanks,
-Dai


Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx>
---
   fs/nfsd/nfs4state.c | 58 ++++++++++++++++++++++++++++++++++++
   fs/nfsd/nfs4xdr.c   | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
   fs/nfsd/state.h     |  7 +++++
   3 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 09a9e16407f9..fb305b28a090 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *);

   static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
   static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;

   static struct workqueue_struct *laundry_wq;

@@ -1175,6 +1176,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
          dp->dl_recalled = false;
          nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
                        &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+       nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
+                       &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
+       dp->dl_cb_fattr.ncf_file_modified = false;
+       dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
          get_nfs4_file(fp);
          dp->dl_stid.sc_file = fp;
          return dp;
@@ -2882,11 +2887,49 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
          spin_unlock(&nn->client_lock);
   }

+static int
+nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+       struct nfs4_cb_fattr *ncf =
+               container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+       ncf->ncf_cb_status = task->tk_status;
+       switch (task->tk_status) {
+       case -NFS4ERR_DELAY:
+               rpc_delay(task, 2 * HZ);
+               return 0;
+       default:
+               return 1;
+       }
+}
+
+static void
+nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
+{
+       struct nfs4_cb_fattr *ncf =
+               container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+       clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
+       wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+}
+
   static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
          .done           = nfsd4_cb_recall_any_done,
          .release        = nfsd4_cb_recall_any_release,
   };

+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
+       .done           = nfsd4_cb_getattr_done,
+       .release        = nfsd4_cb_getattr_release,
+};
+
+void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
+{
+       if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
+               return;
+       nfsd4_run_cb(&ncf->ncf_getattr);
+}
+
   static struct nfs4_client *create_client(struct xdr_netobj name,
                  struct svc_rqst *rqstp, nfs4_verifier *verf)
   {
@@ -5591,6 +5634,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
          int cb_up;
          int status = 0;
          u32 wdeleg = false;
+       struct kstat stat;
+       struct path path;

          cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
          open->op_recall = 0;
@@ -5626,6 +5671,19 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
          wdeleg = open->op_share_access & NFS4_SHARE_ACCESS_WRITE;
          open->op_delegate_type = wdeleg ?
                          NFS4_OPEN_DELEGATE_WRITE : NFS4_OPEN_DELEGATE_READ;
+       if (wdeleg) {
+               path.mnt = currentfh->fh_export->ex_path.mnt;
+               path.dentry = currentfh->fh_dentry;
+               if (vfs_getattr(&path, &stat, STATX_BASIC_STATS,
+                                               AT_STATX_SYNC_AS_STAT)) {
+                       nfs4_put_stid(&dp->dl_stid);
+                       destroy_delegation(dp);
+                       goto out_no_deleg;
+               }
+               dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
+               dp->dl_cb_fattr.ncf_initial_cinfo = nfsd4_change_attribute(&stat,
+                                                       d_inode(currentfh->fh_dentry));
+       }
          nfs4_put_stid(&dp->dl_stid);
          return;
   out_no_deleg:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 76db2fe29624..5d7e11db8ccf 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2920,6 +2920,77 @@ nfsd4_encode_bitmap(struct xdr_stream *xdr, u32 bmval0, u32 bmval1, u32 bmval2)
          return nfserr_resource;
   }

+static struct file_lock *
+nfs4_wrdeleg_filelock(struct svc_rqst *rqstp, struct inode *inode)
+{
+       struct file_lock_context *ctx;
+       struct file_lock *fl;
+
+       ctx = locks_inode_context(inode);
+       if (!ctx)
+               return NULL;
+       spin_lock(&ctx->flc_lock);
+       list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+               if (fl->fl_type == F_WRLCK) {
+                       spin_unlock(&ctx->flc_lock);
+                       return fl;
+               }
+       }
+       spin_unlock(&ctx->flc_lock);
+       return NULL;
+}
+
+static __be32
+nfs4_handle_wrdeleg_conflict(struct svc_rqst *rqstp, struct inode *inode,
+                       bool *modified, u64 *size)
+{
+       __be32 status;
+       struct file_lock *fl;
+       struct nfs4_delegation *dp;
+       struct nfs4_cb_fattr *ncf;
+       struct iattr attrs;
+
+       *modified = false;
+       fl = nfs4_wrdeleg_filelock(rqstp, inode);
+       if (!fl)
+               return 0;
+       dp = fl->fl_owner;
+       ncf = &dp->dl_cb_fattr;
+       if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker))
+               return 0;
+
+       refcount_inc(&dp->dl_stid.sc_count);
+       nfs4_cb_getattr(&dp->dl_cb_fattr);
+       wait_on_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, TASK_INTERRUPTIBLE);
+       if (ncf->ncf_cb_status) {
+               status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+               nfs4_put_stid(&dp->dl_stid);
+               return status;
+       }
+       ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+       if (!ncf->ncf_file_modified &&
+                       (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+                       ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) {
+               ncf->ncf_file_modified = true;
+       }
+
+       if (ncf->ncf_file_modified) {
+               /*
+                * The server would not update the file's metadata
+                * with the client's modified size.
+                * nfsd4 change attribute is constructed from ctime.
+                */
+               attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
+               attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
+               setattr_copy(&nop_mnt_idmap, inode, &attrs);
+               mark_inode_dirty(inode);
+               *size = ncf->ncf_cur_fsize;
+               *modified = true;
+       }
+       nfs4_put_stid(&dp->dl_stid);
+       return 0;
+}
+
   /*
    * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
    * ourselves.
@@ -2957,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                  .dentry = dentry,
          };
          struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+       bool file_modified;
+       u64 size = 0;

          BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
          BUG_ON(!nfsd_attrs_supported(minorversion, bmval));
@@ -2966,6 +3039,12 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                  if (status)
                          goto out;
          }
+       if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+               status = nfs4_handle_wrdeleg_conflict(rqstp, d_inode(dentry),
+                                               &file_modified, &size);
+               if (status)
+                       goto out;
+       }

          err = vfs_getattr(&path, &stat,
                            STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
@@ -3089,7 +3168,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                  p = xdr_reserve_space(xdr, 8);
                  if (!p)
                          goto out_resource;
-               p = xdr_encode_hyper(p, stat.size);
+               if (file_modified)
+                       p = xdr_encode_hyper(p, size);
+               else
+                       p = xdr_encode_hyper(p, stat.size);
          }
          if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
                  p = xdr_reserve_space(xdr, 4);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 9fb69ed8ae80..b20b65fe89b4 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -121,6 +121,10 @@ struct nfs4_cb_fattr {
          struct nfsd4_callback ncf_getattr;
          u32 ncf_cb_status;
          u32 ncf_cb_bmap[1];
+       unsigned long ncf_cb_flags;
+       bool ncf_file_modified;
+       u64 ncf_initial_cinfo;
+       u64 ncf_cur_fsize;

          /* from CB_GETATTR reply */
          u64 ncf_cb_change;
@@ -744,6 +748,9 @@ extern void nfsd4_client_record_remove(struct nfs4_client *clp);
   extern int nfsd4_client_record_check(struct nfs4_client *clp);
   extern void nfsd4_record_grace_done(struct nfsd_net *nn);

+/* CB_GETTTAR */
+extern void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf);
+
   static inline bool try_to_expire_client(struct nfs4_client *clp)
   {
          cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
--
2.9.5




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux