> On Jan 5, 2023, at 2:55 PM, Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > Commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a > regular NFSv4 file") added the ability to cache an open fd over a > compound. There are a couple of problems with the way this currently > works: > > It's racy, as a newly-created nfsd_file can end up with its PENDING bit > cleared while the nf is hashed, and the nf_file pointer is still zeroed > out. Other tasks can find it in this state and they expect to see a > valid nf_file, and can oops if nf_file is NULL. > > Also, there is no guarantee that we'll end up creating a new nfsd_file > if one is already in the hash. If an extant entry is in the hash with a > valid nf_file, nfs4_get_vfs_file will clobber its nf_file pointer with > the value of op_file and the old nf_file will leak. > > Fix both issues by making a new nfsd_file_acquirei_opened variant that > takes an optional file pointer. If one is present when this is called, > we'll take a new reference to it instead of trying to open the file. If > the nfsd_file already has a valid nf_file, we'll just ignore the > optional file and pass the nfsd_file back as-is. > > Also rework the tracepoints a bit to allow for an "opened" variant and > don't try to avoid counting acquisitions in the case where we already > have a cached open file. > > Fixes: fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") > Cc: Trond Myklebust <trondmy@xxxxxxxxxxxxxxx> > Reported-by: Stanislav Saner <ssaner@xxxxxxxxxx> > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/nfsd/filecache.c | 40 ++++++++++++++++++---------------- > fs/nfsd/filecache.h | 5 +++-- > fs/nfsd/nfs4state.c | 16 ++++---------- > fs/nfsd/trace.h | 52 ++++++++++++--------------------------------- > 4 files changed, 42 insertions(+), 71 deletions(-) > > v3: add new nfsd_file_acquire_opened variant instead of changing > nfsd_file_acquire prototype Hi Jeff, v3 applied to nfsd's for-rc. > diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c > index 45b2c9e3f636..0ef070349014 100644 > --- a/fs/nfsd/filecache.c > +++ b/fs/nfsd/filecache.c > @@ -1071,8 +1071,8 @@ nfsd_file_is_cached(struct inode *inode) > > static __be32 > nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > - unsigned int may_flags, struct nfsd_file **pnf, > - bool open, bool want_gc) > + unsigned int may_flags, struct file *file, > + struct nfsd_file **pnf, bool want_gc) > { > struct nfsd_file_lookup_key key = { > .type = NFSD_FILE_KEY_FULL, > @@ -1147,8 +1147,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); > out: > if (status == nfs_ok) { > - if (open) > - this_cpu_inc(nfsd_file_acquisitions); > + this_cpu_inc(nfsd_file_acquisitions); > *pnf = nf; > } else { > if (refcount_dec_and_test(&nf->nf_ref)) > @@ -1158,20 +1157,23 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > > out_status: > put_cred(key.cred); > - if (open) > - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); > + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); > return status; > > open_file: > trace_nfsd_file_alloc(nf); > nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); > if (nf->nf_mark) { > - if (open) { > + if (file) { > + get_file(file); > + nf->nf_file = file; > + status = nfs_ok; > + trace_nfsd_file_opened(nf, status); > + } else { > status = nfsd_open_verified(rqstp, fhp, may_flags, > &nf->nf_file); > trace_nfsd_file_open(nf, status); > - } else > - status = nfs_ok; > + } > } else > status = nfserr_jukebox; > /* > @@ -1207,7 +1209,7 @@ __be32 > nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **pnf) > { > - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); > + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); > } > > /** > @@ -1228,28 +1230,30 @@ __be32 > nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **pnf) > { > - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); > + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); > } > > /** > - * nfsd_file_create - Get a struct nfsd_file, do not open > + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file > * @rqstp: the RPC transaction being executed > * @fhp: the NFS filehandle of the file just created > * @may_flags: NFSD_MAY_ settings for the file > + * @file: cached, already-open file (may be NULL) > * @pnf: OUT: new or found "struct nfsd_file" object > * > - * The nfsd_file_object returned by this API is reference-counted > - * but not garbage-collected. The object is released immediately > - * one RCU grace period after the final nfsd_file_put(). > + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, > + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of > + * opening a new one. > * > * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in > * network byte order is returned. > */ > __be32 > -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, > - unsigned int may_flags, struct nfsd_file **pnf) > +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, > + unsigned int may_flags, struct file *file, > + struct nfsd_file **pnf) > { > - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); > + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); > } > > /* > diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h > index b7efb2c3ddb1..41516a4263ea 100644 > --- a/fs/nfsd/filecache.h > +++ b/fs/nfsd/filecache.h > @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **nfp); > __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, > unsigned int may_flags, struct nfsd_file **nfp); > -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, > - unsigned int may_flags, struct nfsd_file **nfp); > +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, > + unsigned int may_flags, struct file *file, > + struct nfsd_file **nfp); > int nfsd_file_cache_stats_show(struct seq_file *m, void *v); > #endif /* _FS_NFSD_FILECACHE_H */ > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 7b2ee535ade8..4809ae0f0138 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -5262,18 +5262,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, > if (!fp->fi_fds[oflag]) { > spin_unlock(&fp->fi_lock); > > - if (!open->op_filp) { > - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); > - if (status != nfs_ok) > - goto out_put_access; > - } else { > - status = nfsd_file_create(rqstp, cur_fh, access, &nf); > - if (status != nfs_ok) > - goto out_put_access; > - nf->nf_file = open->op_filp; > - open->op_filp = NULL; > - trace_nfsd_file_create(rqstp, access, nf); > - } > + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, > + open->op_filp, &nf); > + if (status != nfs_ok) > + goto out_put_access; > > spin_lock(&fp->fi_lock); > if (!fp->fi_fds[oflag]) { > diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h > index c852ae8eaf37..8f9c82d9e075 100644 > --- a/fs/nfsd/trace.h > +++ b/fs/nfsd/trace.h > @@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire, > ) > ); > > -TRACE_EVENT(nfsd_file_create, > - TP_PROTO( > - const struct svc_rqst *rqstp, > - unsigned int may_flags, > - const struct nfsd_file *nf > - ), > - > - TP_ARGS(rqstp, may_flags, nf), > - > - TP_STRUCT__entry( > - __field(const void *, nf_inode) > - __field(const void *, nf_file) > - __field(unsigned long, may_flags) > - __field(unsigned long, nf_flags) > - __field(unsigned long, nf_may) > - __field(unsigned int, nf_ref) > - __field(u32, xid) > - ), > - > - TP_fast_assign( > - __entry->nf_inode = nf->nf_inode; > - __entry->nf_file = nf->nf_file; > - __entry->may_flags = may_flags; > - __entry->nf_flags = nf->nf_flags; > - __entry->nf_may = nf->nf_may; > - __entry->nf_ref = refcount_read(&nf->nf_ref); > - __entry->xid = be32_to_cpu(rqstp->rq_xid); > - ), > - > - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", > - __entry->xid, __entry->nf_inode, > - show_nfsd_may_flags(__entry->may_flags), > - __entry->nf_ref, show_nf_flags(__entry->nf_flags), > - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file > - ) > -); > - > TRACE_EVENT(nfsd_file_insert_err, > TP_PROTO( > const struct svc_rqst *rqstp, > @@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err, > ) > ); > > -TRACE_EVENT(nfsd_file_open, > - TP_PROTO(struct nfsd_file *nf, __be32 status), > +DECLARE_EVENT_CLASS(nfsd_file_open_class, > + TP_PROTO(const struct nfsd_file *nf, __be32 status), > TP_ARGS(nf, status), > TP_STRUCT__entry( > __field(void *, nf_inode) /* cannot be dereferenced */ > @@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open, > __entry->nf_file) > ) > > +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ > +DEFINE_EVENT(nfsd_file_open_class, name, \ > + TP_PROTO( \ > + const struct nfsd_file *nf, \ > + __be32 status \ > + ), \ > + TP_ARGS(nf, status)) > + > +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); > +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); > + > TRACE_EVENT(nfsd_file_is_cached, > TP_PROTO( > const struct inode *inode, > -- > 2.39.0 > -- Chuck Lever