Re: [PATCH v2] nfsd: fix handling of cached open files in nfsd4_open codepath

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> On Jan 5, 2023, at 9:02 AM, Chuck Lever III <chuck.lever@xxxxxxxxxx> wrote:
> 
> 
> 
>> On Jan 5, 2023, at 6:23 AM, Jeff Layton <jlayton@xxxxxxxxxx> wrote:
>> 
>> Commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a
>> regular NFSv4 file") added the ability to cache an open fd over a
>> compound. There are a couple of problems with the way this currently
>> works:
>> 
>> It's racy, as a newly-created nfsd_file can end up with its PENDING bit
>> cleared while the nf is hashed, and the nf_file pointer is still zeroed
>> out. Other tasks can find it in this state and they expect to see a
>> valid nf_file, and can oops if nf_file is NULL.
>> 
>> Also, there is no guarantee that we'll end up creating a new nfsd_file
>> if one is already in the hash. If an extant entry is in the hash with a
>> valid nf_file, nfs4_get_vfs_file will clobber its nf_file pointer with
>> the value of op_file and the old nf_file will leak.
>> 
>> Fix both issues by changing nfsd_file_acquire to take an optional file
>> pointer. If one is present when this is called, we'll take a new
>> reference to it instead of trying to open the file. If the nfsd_file
>> already has a valid nf_file, we'll just ignore the optional file and
>> pass the nfsd_file back as-is.
>> 
>> Also rework the tracepoints a bit to allow for a cached open variant,
>> and don't try to avoid counting acquisitions in the case where we
>> already have a cached open file.
>> 
>> Cc: Trond Myklebust <trondmy@xxxxxxxxxxxxxxx>
>> Reported-by: Stanislav Saner <ssaner@xxxxxxxxxx>
>> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
>> ---
>> fs/nfsd/filecache.c | 49 ++++++++++++++----------------------------
>> fs/nfsd/filecache.h |  5 ++---
>> fs/nfsd/nfs4proc.c  |  2 +-
>> fs/nfsd/nfs4state.c | 20 ++++++-----------
>> fs/nfsd/trace.h     | 52 ++++++++++++---------------------------------
>> 5 files changed, 38 insertions(+), 90 deletions(-)
>> 
>> v2: rebased directly onto current master branch to fix up some
>>   contextual conflicts
> 
> Hi Jeff-
> 
> The basic race is that nf_file must be filled in before the PENDING
> bit is cleared. Got it.
> 
> Seems like -rc fodder, and needs a Fixes: tag.

In other words, maybe it should be rebased on for-rc, not for-next ?


> However, I'd prefer to keep the synopses of nfsd_file_acquire() and
> nfsd_file_acquire_gc() identical. nfs4_get_vfs_file() is the one
> spot that needs this special behavior, so it should continue to
> call nfsd_file_create(), or something like it. How about one of:
> 
> nfsd_file_acquire2
> nfsd_file_acquire_create
> nfsd_file_acquire_cached
> 
> 
>> diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
>> index 45b2c9e3f636..6674a86e1917 100644
>> --- a/fs/nfsd/filecache.c
>> +++ b/fs/nfsd/filecache.c
>> @@ -1071,8 +1071,8 @@ nfsd_file_is_cached(struct inode *inode)
>> 
>> static __be32
>> nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> -		     unsigned int may_flags, struct nfsd_file **pnf,
>> -		     bool open, bool want_gc)
>> +		     unsigned int may_flags, struct file *file,
>> +		     struct nfsd_file **pnf, bool want_gc)
>> {
>> 	struct nfsd_file_lookup_key key = {
>> 		.type	= NFSD_FILE_KEY_FULL,
>> @@ -1147,8 +1147,7 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> 	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
>> out:
>> 	if (status == nfs_ok) {
>> -		if (open)
>> -			this_cpu_inc(nfsd_file_acquisitions);
>> +		this_cpu_inc(nfsd_file_acquisitions);
>> 		*pnf = nf;
>> 	} else {
>> 		if (refcount_dec_and_test(&nf->nf_ref))
>> @@ -1158,20 +1157,23 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> 
>> out_status:
>> 	put_cred(key.cred);
>> -	if (open)
>> -		trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
>> +	trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
>> 	return status;
>> 
>> open_file:
>> 	trace_nfsd_file_alloc(nf);
>> 	nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
>> 	if (nf->nf_mark) {
>> -		if (open) {
>> +		if (file) {
>> +			get_file(file);
>> +			nf->nf_file = file;
>> +			status = nfs_ok;
>> +			trace_nfsd_file_open_cached(nf, status);
>> +		} else {
>> 			status = nfsd_open_verified(rqstp, fhp, may_flags,
>> 						    &nf->nf_file);
>> 			trace_nfsd_file_open(nf, status);
>> -		} else
>> -			status = nfs_ok;
>> +		}
>> 	} else
>> 		status = nfserr_jukebox;
>> 	/*
>> @@ -1207,7 +1209,7 @@ __be32
>> nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> 		     unsigned int may_flags, struct nfsd_file **pnf)
>> {
>> -	return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true);
>> +	return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true);
>> }
>> 
>> /**
>> @@ -1215,6 +1217,7 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> * @rqstp: the RPC transaction being executed
>> * @fhp: the NFS filehandle of the file to be opened
>> * @may_flags: NFSD_MAY_ settings for the file
>> + * @file: cached, already-open file (may be NULL)
>> * @pnf: OUT: new or found "struct nfsd_file" object
>> *
>> * The nfsd_file_object returned by this API is reference-counted
>> @@ -1226,30 +1229,10 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> */
>> __be32
>> nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> -		  unsigned int may_flags, struct nfsd_file **pnf)
>> -{
>> -	return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false);
>> -}
>> -
>> -/**
>> - * nfsd_file_create - Get a struct nfsd_file, do not open
>> - * @rqstp: the RPC transaction being executed
>> - * @fhp: the NFS filehandle of the file just created
>> - * @may_flags: NFSD_MAY_ settings for the file
>> - * @pnf: OUT: new or found "struct nfsd_file" object
>> - *
>> - * The nfsd_file_object returned by this API is reference-counted
>> - * but not garbage-collected. The object is released immediately
>> - * one RCU grace period after the final nfsd_file_put().
>> - *
>> - * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
>> - * network byte order is returned.
>> - */
>> -__be32
>> -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> -		 unsigned int may_flags, struct nfsd_file **pnf)
>> +		  unsigned int may_flags, struct file *file,
>> +		  struct nfsd_file **pnf)
>> {
>> -	return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false);
>> +	return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false);
>> }
>> 
>> /*
>> diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
>> index b7efb2c3ddb1..ef0083cd4ea9 100644
>> --- a/fs/nfsd/filecache.h
>> +++ b/fs/nfsd/filecache.h
>> @@ -59,8 +59,7 @@ bool nfsd_file_is_cached(struct inode *inode);
>> __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> 		  unsigned int may_flags, struct nfsd_file **nfp);
>> __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> -		  unsigned int may_flags, struct nfsd_file **nfp);
>> -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> -		  unsigned int may_flags, struct nfsd_file **nfp);
>> +		  unsigned int may_flags, struct file *file,
>> +		  struct nfsd_file **nfp);
>> int nfsd_file_cache_stats_show(struct seq_file *m, void *v);
>> #endif /* _FS_NFSD_FILECACHE_H */
>> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>> index bd880d55f565..6b09cdd4b067 100644
>> --- a/fs/nfsd/nfs4proc.c
>> +++ b/fs/nfsd/nfs4proc.c
>> @@ -735,7 +735,7 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> 	__be32 status;
>> 
>> 	status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE |
>> -				   NFSD_MAY_NOT_BREAK_LEASE, &nf);
>> +				   NFSD_MAY_NOT_BREAK_LEASE, NULL, &nf);
>> 	if (status != nfs_ok)
>> 		return status;
>> 
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index 7b2ee535ade8..b68238024e49 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -5262,18 +5262,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
>> 	if (!fp->fi_fds[oflag]) {
>> 		spin_unlock(&fp->fi_lock);
>> 
>> -		if (!open->op_filp) {
>> -			status = nfsd_file_acquire(rqstp, cur_fh, access, &nf);
>> -			if (status != nfs_ok)
>> -				goto out_put_access;
>> -		} else {
>> -			status = nfsd_file_create(rqstp, cur_fh, access, &nf);
>> -			if (status != nfs_ok)
>> -				goto out_put_access;
>> -			nf->nf_file = open->op_filp;
>> -			open->op_filp = NULL;
>> -			trace_nfsd_file_create(rqstp, access, nf);
>> -		}
>> +		status = nfsd_file_acquire(rqstp, cur_fh, access,
>> +					   open->op_filp, &nf);
>> +		if (status != nfs_ok)
>> +			goto out_put_access;
>> 
>> 		spin_lock(&fp->fi_lock);
>> 		if (!fp->fi_fds[oflag]) {
>> @@ -6472,7 +6464,7 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
>> 			goto out;
>> 		}
>> 	} else {
>> -		status = nfsd_file_acquire(rqstp, fhp, acc, &nf);
>> +		status = nfsd_file_acquire(rqstp, fhp, acc, NULL, &nf);
>> 		if (status)
>> 			return status;
>> 	}
>> @@ -7644,7 +7636,7 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
>> 	struct inode *inode;
>> 	__be32 err;
>> 
>> -	err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
>> +	err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, NULL, &nf);
>> 	if (err)
>> 		return err;
>> 	inode = fhp->fh_dentry->d_inode;
>> diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
>> index c852ae8eaf37..7c6cbc37c8c9 100644
>> --- a/fs/nfsd/trace.h
>> +++ b/fs/nfsd/trace.h
>> @@ -981,43 +981,6 @@ TRACE_EVENT(nfsd_file_acquire,
>> 	)
>> );
>> 
>> -TRACE_EVENT(nfsd_file_create,
>> -	TP_PROTO(
>> -		const struct svc_rqst *rqstp,
>> -		unsigned int may_flags,
>> -		const struct nfsd_file *nf
>> -	),
>> -
>> -	TP_ARGS(rqstp, may_flags, nf),
>> -
>> -	TP_STRUCT__entry(
>> -		__field(const void *, nf_inode)
>> -		__field(const void *, nf_file)
>> -		__field(unsigned long, may_flags)
>> -		__field(unsigned long, nf_flags)
>> -		__field(unsigned long, nf_may)
>> -		__field(unsigned int, nf_ref)
>> -		__field(u32, xid)
>> -	),
>> -
>> -	TP_fast_assign(
>> -		__entry->nf_inode = nf->nf_inode;
>> -		__entry->nf_file = nf->nf_file;
>> -		__entry->may_flags = may_flags;
>> -		__entry->nf_flags = nf->nf_flags;
>> -		__entry->nf_may = nf->nf_may;
>> -		__entry->nf_ref = refcount_read(&nf->nf_ref);
>> -		__entry->xid = be32_to_cpu(rqstp->rq_xid);
>> -	),
>> -
>> -	TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p",
>> -		__entry->xid, __entry->nf_inode,
>> -		show_nfsd_may_flags(__entry->may_flags),
>> -		__entry->nf_ref, show_nf_flags(__entry->nf_flags),
>> -		show_nfsd_may_flags(__entry->nf_may), __entry->nf_file
>> -	)
>> -);
>> -
>> TRACE_EVENT(nfsd_file_insert_err,
>> 	TP_PROTO(
>> 		const struct svc_rqst *rqstp,
>> @@ -1079,8 +1042,8 @@ TRACE_EVENT(nfsd_file_cons_err,
>> 	)
>> );
>> 
>> -TRACE_EVENT(nfsd_file_open,
>> -	TP_PROTO(struct nfsd_file *nf, __be32 status),
>> +DECLARE_EVENT_CLASS(nfsd_file_open_class,
>> +	TP_PROTO(const struct nfsd_file *nf, __be32 status),
>> 	TP_ARGS(nf, status),
>> 	TP_STRUCT__entry(
>> 		__field(void *, nf_inode)	/* cannot be dereferenced */
>> @@ -1104,6 +1067,17 @@ TRACE_EVENT(nfsd_file_open,
>> 		__entry->nf_file)
>> )
>> 
>> +#define DEFINE_NFSD_FILE_OPEN_EVENT(name)					\
>> +DEFINE_EVENT(nfsd_file_open_class, name,					\
>> +	TP_PROTO(							\
>> +		const struct nfsd_file *nf,				\
>> +		__be32 status						\
>> +	),								\
>> +	TP_ARGS(nf, status))
>> +
>> +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open);
>> +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open_cached);
>> +
>> TRACE_EVENT(nfsd_file_is_cached,
>> 	TP_PROTO(
>> 		const struct inode *inode,
>> -- 
>> 2.39.0
>> 
> 
> --
> Chuck Lever

--
Chuck Lever







[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux