On Tue, Feb 13, 2024 at 09:47:27AM -0800, Dai Ngo wrote: > If the GETATTR request on a file that has write delegation in effect > and the request attributes include the change info and size attribute > then the request is handled as below: > > Server sends CB_GETATTR to client to get the latest change info and file > size. If these values are the same as the server's cached values then > the GETATTR proceeds as normal. > > If either the change info or file size is different from the server's > cached values, or the file was already marked as modified, then: > > . update time_modify and time_metadata into file's metadata > with current time > > . encode GETATTR as normal except the file size is encoded with > the value returned from CB_GETATTR > > . mark the file as modified > > The CB_GETATTR is given 30ms to complte. If the CB_GETATTR fails for > any reasons, the delegation is recalled and NFS4ERR_DELAY is returned > for the GETATTR from the second client. > > Signed-off-by: Dai Ngo <dai.ngo@xxxxxxxxxx> > --- > fs/nfsd/nfs4state.c | 119 ++++++++++++++++++++++++++++++++++++++++---- > fs/nfsd/nfs4xdr.c | 10 +++- > fs/nfsd/nfsd.h | 1 + > fs/nfsd/state.h | 10 +++- > 4 files changed, 127 insertions(+), 13 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index d9260e77ef2d..87987515e03d 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -127,6 +127,7 @@ static void free_session(struct nfsd4_session *); > > static const struct nfsd4_callback_ops nfsd4_cb_recall_ops; > static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops; > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops; > > static struct workqueue_struct *laundry_wq; > > @@ -1189,6 +1190,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp, > dp->dl_recalled = false; > nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, > &nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL); > + nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client, > + &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR); > + dp->dl_cb_fattr.ncf_file_modified = false; > + dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE; > get_nfs4_file(fp); > dp->dl_stid.sc_file = fp; > return dp; > @@ -3044,11 +3049,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb) > spin_unlock(&nn->client_lock); > } > > +static int > +nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task) > +{ > + struct nfs4_cb_fattr *ncf = > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); > + > + ncf->ncf_cb_status = task->tk_status; > + switch (task->tk_status) { > + case -NFS4ERR_DELAY: > + rpc_delay(task, 2 * HZ); > + return 0; > + default: > + return 1; > + } > +} > + > +static void > +nfsd4_cb_getattr_release(struct nfsd4_callback *cb) > +{ > + struct nfs4_cb_fattr *ncf = > + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); > + struct nfs4_delegation *dp = > + container_of(ncf, struct nfs4_delegation, dl_cb_fattr); > + > + nfs4_put_stid(&dp->dl_stid); > + clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags); > + wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY); > +} > + What happens if the client responds after the GETATTR timer elapses? Can nfsd4_cb_getattr_release over-write memory that is now being used for something else? > static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = { > .done = nfsd4_cb_recall_any_done, > .release = nfsd4_cb_recall_any_release, > }; > > +static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = { > + .done = nfsd4_cb_getattr_done, > + .release = nfsd4_cb_getattr_release, > +}; > + > +static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf) > +{ > + struct nfs4_delegation *dp = > + container_of(ncf, struct nfs4_delegation, dl_cb_fattr); > + > + if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags)) > + return; > + /* set to proper status when nfsd4_cb_getattr_done runs */ > + ncf->ncf_cb_status = NFS4ERR_IO; > + > + refcount_inc(&dp->dl_stid.sc_count); > + nfsd4_run_cb(&ncf->ncf_getattr); > +} > + > static struct nfs4_client *create_client(struct xdr_netobj name, > struct svc_rqst *rqstp, nfs4_verifier *verf) > { > @@ -5854,6 +5907,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, > struct svc_fh *parent = NULL; > int cb_up; > int status = 0; > + struct kstat stat; > + struct path path; > > cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client); > open->op_recall = false; > @@ -5891,6 +5946,18 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, > if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) { > open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE; > trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid); > + path.mnt = currentfh->fh_export->ex_path.mnt; > + path.dentry = currentfh->fh_dentry; > + if (vfs_getattr(&path, &stat, > + (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE), > + AT_STATX_SYNC_AS_STAT)) { > + nfs4_put_stid(&dp->dl_stid); > + destroy_delegation(dp); > + goto out_no_deleg; > + } > + dp->dl_cb_fattr.ncf_cur_fsize = stat.size; > + dp->dl_cb_fattr.ncf_initial_cinfo = > + nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry)); > } else { > open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; > trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid); > @@ -8720,6 +8787,8 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict > * @rqstp: RPC transaction context > * @inode: file to be checked for a conflict > + * @modified: return true if file was modified > + * @size: new size of file if modified is true > * > * This function is called when there is a conflict between a write > * delegation and a change/size GETATTR from another client. The server > @@ -8728,22 +8797,22 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, > * delegation before replying to the GETATTR. See RFC 8881 section > * 18.7.4. > * > - * The current implementation does not support CB_GETATTR yet. However > - * this can avoid recalling the delegation could be added in follow up > - * work. > - * > * Returns 0 if there is no conflict; otherwise an nfs_stat > * code is returned. > */ > __be32 > -nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) > +nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode, > + bool *modified, u64 *size) > { > __be32 status; > struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); > struct file_lock_context *ctx; > struct file_lock *fl; > struct nfs4_delegation *dp; > + struct iattr attrs; > + struct nfs4_cb_fattr *ncf; > > + *modified = false; > ctx = locks_inode_context(inode); > if (!ctx) > return 0; > @@ -8768,12 +8837,42 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) > return 0; > } > break_lease: > - spin_unlock(&ctx->flc_lock); > nfsd_stats_wdeleg_getattr_inc(nn); > - status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); > - if (status != nfserr_jukebox || > - !nfsd_wait_for_delegreturn(rqstp, inode)) > - return status; > + dp = fl->fl_owner; > + ncf = &dp->dl_cb_fattr; > + nfs4_cb_getattr(&dp->dl_cb_fattr); > + spin_unlock(&ctx->flc_lock); > + /* > + * allow 30 ms for the callback to complete which should > + * take care most cases when everything works normally. > + * Otherwise just fall back to the normal mechanisnm to > + * recall the delegation. > + */ The code already says what the comment says, and if NFSD_CB_GETATTR_TIMEOUT is ever changed, the comment will become stale. I suggest removing this comment and adding just a one-liner something like below: > + wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY, > + TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT); > + if (ncf->ncf_cb_status) { /* Recall delegation only if client didn't respond */ > + status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); > + if (status != nfserr_jukebox || > + !nfsd_wait_for_delegreturn(rqstp, inode)) > + return status; > + } > + if (!ncf->ncf_file_modified && > + (ncf->ncf_initial_cinfo != ncf->ncf_cb_change || > + ncf->ncf_cur_fsize != ncf->ncf_cb_fsize)) > + ncf->ncf_file_modified = true; > + if (ncf->ncf_file_modified) { > + /* > + * The server would not update the file's metadata > + * with the client's modified size. > + */ I don't understand "The server would not update...". Does that mean the server is not supposed to update, or something failed? Can this comment be clarified? > + attrs.ia_mtime = attrs.ia_ctime = current_time(inode); > + attrs.ia_valid = ATTR_MTIME | ATTR_CTIME; > + setattr_copy(&nop_mnt_idmap, inode, &attrs); > + mark_inode_dirty(inode); > + ncf->ncf_cur_fsize = ncf->ncf_cb_fsize; > + *size = ncf->ncf_cur_fsize; > + *modified = true; > + } One of the lesser-known guidelines of coding style is that if the indent level grows too deep, that's a sign that you should move this code into another function. Up to you, but IMO that might be easier to read. > return 0; > } > break; > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index e3f761cd5ee7..9e8f230fc96e 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, > unsigned long mask[2]; > } u; > unsigned long bit; > + bool file_modified = false; > + u64 size = 0; > > WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1); > WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval)); > @@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, > } > args.size = 0; > if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) { > - status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry)); > + status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry), > + &file_modified, &size); > if (status) > goto out; > } > @@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, > AT_STATX_SYNC_AS_STAT); > if (err) > goto out_nfserr; > - args.size = args.stat.size; > + if (file_modified) > + args.size = size; > + else > + args.size = args.stat.size; > > if (!(args.stat.result_mask & STATX_BTIME)) > /* underlying FS does not offer btime so we can't share it */ > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 8daf22d766c6..16c5a05f340e 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -367,6 +367,7 @@ void nfsd_lockd_shutdown(void); > #define NFSD_CLIENT_MAX_TRIM_PER_RUN 128 > #define NFS4_CLIENTS_PER_GB 1024 > #define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */ > +#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT > > /* > * The following attributes are currently not supported by the NFSv4 server: > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 3bf418ee6c97..01c6f3445646 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -142,8 +142,16 @@ struct nfs4_cb_fattr { > /* from CB_GETATTR reply */ > u64 ncf_cb_change; > u64 ncf_cb_fsize; > + > + unsigned long ncf_cb_flags; > + bool ncf_file_modified; > + u64 ncf_initial_cinfo; > + u64 ncf_cur_fsize; > }; > > +/* bits for ncf_cb_flags */ > +#define CB_GETATTR_BUSY 0 > + > /* > * Represents a delegation stateid. The nfs4_client holds references to these > * and they are put when it is being destroyed or when the delegation is > @@ -773,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp) > } > > extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, > - struct inode *inode); > + struct inode *inode, bool *file_modified, u64 *size); > #endif /* NFSD4_STATE_H */ > -- > 2.39.3 > -- Chuck Lever