Re: [PATCH v6 3/3] NFSD: add rpc_status entry in nfsd debug filesystem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 11 Aug 2023, Chuck Lever wrote:
> On Thu, Aug 10, 2023 at 10:39:21AM +0200, Lorenzo Bianconi wrote:
> > Introduce rpc_status entry in nfsd debug filesystem in order to dump
> > pending RPC requests debugging information.
> > 
> > Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366
> > Reviewed-by: NeilBrown <neilb@xxxxxxx>
> > Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx>
> > ---
> >  fs/nfsd/nfs4proc.c         |   4 +-
> >  fs/nfsd/nfsctl.c           |   9 +++
> >  fs/nfsd/nfsd.h             |   7 ++
> >  fs/nfsd/nfssvc.c           | 140 +++++++++++++++++++++++++++++++++++++
> >  include/linux/sunrpc/svc.h |   1 +
> >  net/sunrpc/svc.c           |   2 +-
> >  6 files changed, 159 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index aa4f21f92deb..ff5a1dddc0ed 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
> >  
> >  static const struct nfsd4_operation nfsd4_ops[];
> >  
> > -static const char *nfsd4_op_name(unsigned opnum);
> > -
> >  /*
> >   * Enforce NFSv4.1 COMPOUND ordering rules:
> >   *
> > @@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op)
> >  	}
> >  }
> >  
> > -static const char *nfsd4_op_name(unsigned opnum)
> > +const char *nfsd4_op_name(unsigned int opnum)
> >  {
> >  	if (opnum < ARRAY_SIZE(nfsd4_ops))
> >  		return nfsd4_ops[opnum].op_name;
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index dad88bff3b9e..83eb5c6d894e 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -47,6 +47,7 @@ enum {
> >  	NFSD_MaxBlkSize,
> >  	NFSD_MaxConnections,
> >  	NFSD_Filecache,
> > +	NFSD_Rpc_Status,
> >  	/*
> >  	 * The below MUST come last.  Otherwise we leave a hole in nfsd_files[]
> >  	 * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
> > @@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file)
> >  	return file_inode(file)->i_sb->s_fs_info;
> >  }
> >  
> > +static const struct file_operations nfsd_rpc_status_operations = {
> > +	.open		= nfsd_rpc_status_open,
> > +	.read		= seq_read,
> > +	.llseek		= seq_lseek,
> > +	.release	= nfsd_stats_release,
> > +};
> > +
> >  /*
> >   * write_unlock_ip - Release all locks used by a client
> >   *
> > @@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
> >  		[NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
> >  		[NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO},
> >  		[NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO},
> > +		[NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO},
> >  #ifdef CONFIG_NFSD_V4
> >  		[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
> >  		[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
> > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > index 55b9d85ed71b..3e8a47b93fd4 100644
> > --- a/fs/nfsd/nfsd.h
> > +++ b/fs/nfsd/nfsd.h
> > @@ -94,6 +94,7 @@ int		nfsd_get_nrthreads(int n, int *, struct net *);
> >  int		nfsd_set_nrthreads(int n, int *, struct net *);
> >  int		nfsd_pool_stats_open(struct inode *, struct file *);
> >  int		nfsd_stats_release(struct inode *, struct file *);
> > +int		nfsd_rpc_status_open(struct inode *inode, struct file *file);
> >  void		nfsd_shutdown_threads(struct net *net);
> >  
> >  static inline void nfsd_put(struct net *net)
> > @@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn);
> >  
> >  extern void nfsd4_init_leases_net(struct nfsd_net *nn);
> >  
> > +const char *nfsd4_op_name(unsigned int opnum);
> >  #else /* CONFIG_NFSD_V4 */
> >  static inline int nfsd4_is_junction(struct dentry *dentry)
> >  {
> >  	return 0;
> >  }
> >  
> > +static inline const char *nfsd4_op_name(unsigned int opnum)
> > +{
> > +	return "unknown_operation";
> > +}
> > +
> >  static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { };
> >  
> >  #define register_cld_notifier() 0
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 460219030ce1..237be14d3a11 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> >  	if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
> >  		goto out_decode_err;
> >  
> > +	/*
> > +	 * Release rq_status_counter setting it to an odd value after the rpc
> > +	 * request has been properly parsed. rq_status_counter is used to
> > +	 * notify the consumers if the rqstp fields are stable
> > +	 * (rq_status_counter is odd) or not meaningful (rq_status_counter
> > +	 * is even).
> > +	 */
> > +	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
> > +
> >  	rp = NULL;
> >  	switch (nfsd_cache_lookup(rqstp, &rp)) {
> >  	case RC_DOIT:
> > @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> >  	if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
> >  		goto out_encode_err;
> >  
> > +	/*
> > +	 * Release rq_status_counter setting it to an even value after the rpc
> > +	 * request has been properly processed.
> > +	 */
> > +	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
> > +
> >  	nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
> >  out_cached_reply:
> >  	return 1;
> > @@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file)
> >  	mutex_unlock(&nfsd_mutex);
> >  	return ret;
> >  }
> > +
> > +static int nfsd_rpc_status_show(struct seq_file *m, void *v)
> > +{
> > +	struct inode *inode = file_inode(m->file);
> > +	struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id);
> > +	int i;
> > +
> > +	seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n");
> > +
> > +	rcu_read_lock();
> > +
> > +	for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
> > +		struct svc_rqst *rqstp;
> > +
> > +		list_for_each_entry_rcu(rqstp,
> > +				&nn->nfsd_serv->sv_pools[i].sp_all_threads,
> > +				rq_all) {
> > +			struct {
> > +				struct sockaddr daddr;
> > +				struct sockaddr saddr;
> > +				unsigned long rq_flags;
> > +				const char *pc_name;
> > +				ktime_t rq_stime;
> > +				__be32 rq_xid;
> > +				u32 rq_vers;
> > +				/* NFSv4 compund */
> > +				u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
> > +			} rqstp_info;
> > +			unsigned int status_counter;
> > +			char buf[RPC_MAX_ADDRBUFLEN];
> > +			int opcnt = 0;
> > +
> > +			/*
> > +			 * Acquire rq_status_counter before parsing the rqst
> > +			 * fields. rq_status_counter is set to an odd value in
> > +			 * order to notify the consumers the rqstp fields are
> > +			 * meaningful.
> > +			 */
> > +			status_counter = smp_load_acquire(&rqstp->rq_status_counter);
> > +			if (!(status_counter & 1))
> > +				continue;
> > +
> > +			rqstp_info.rq_xid = rqstp->rq_xid;
> > +			rqstp_info.rq_flags = rqstp->rq_flags;
> > +			rqstp_info.rq_vers = rqstp->rq_vers;
> > +			rqstp_info.pc_name = svc_proc_name(rqstp);
> > +			rqstp_info.rq_stime = rqstp->rq_stime;
> > +			memcpy(&rqstp_info.daddr, svc_daddr(rqstp),
> > +			       sizeof(struct sockaddr));
> > +			memcpy(&rqstp_info.saddr, svc_addr(rqstp),
> > +			       sizeof(struct sockaddr));
> > +
> > +#ifdef CONFIG_NFSD_V4
> > +			if (rqstp->rq_vers == NFS4_VERSION &&
> > +			    rqstp->rq_proc == NFSPROC4_COMPOUND) {
> > +				/* NFSv4 compund */
> > +				struct nfsd4_compoundargs *args = rqstp->rq_argp;
> > +				int j;
> > +
> > +				opcnt = args->opcnt;
> > +				for (j = 0; j < opcnt; j++) {
> > +					struct nfsd4_op *op = &args->ops[j];
> > +
> > +					rqstp_info.opnum[j] = op->opnum;
> > +				}
> > +			}
> > +#endif /* CONFIG_NFSD_V4 */
> > +
> > +			/*
> > +			 * Acquire rq_status_counter before reporting the rqst
> > +			 * fields to the user.
> > +			 */
> > +			if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter)
> > +				continue;
> > +
> > +			seq_printf(m,
> > +				   "%04u %04ld NFSv%d %s %016lld",
> > +				   be32_to_cpu(rqstp_info.rq_xid),
> 
> It's proper to display XIDs as 8-hexit hexadecimal values, as you
> did before. "0x%08x" is the correct format, as that matches the
> XID display format used by Wireshark and our tracepoints.
> 
> 
> > +				   rqstp_info.rq_flags,
> 
> I didn't mean for you to change the flags format to decimal. I was
> trying to point out that the content of this field will need to be
> displayed symbolically if we care about an easy user experience.
> 
> Let's stick with hex here. A clever user can read the bits directly
> from that. All others should have a tool that parses this field and
> prints the list of bits in it symbolically.
> 
> 
> > +				   rqstp_info.rq_vers,
> > +				   rqstp_info.pc_name,
> > +				   ktime_to_us(rqstp_info.rq_stime));
> > +			seq_printf(m, " %s",
> > +				   __svc_print_addr(&rqstp_info.saddr, buf,
> > +						    sizeof(buf), false));
> > +			seq_printf(m, " %s",
> > +				   __svc_print_addr(&rqstp_info.daddr, buf,
> > +						    sizeof(buf), false));
> > +			if (opcnt) {
> > +				int j;
> > +
> > +				seq_puts(m, " ");
> > +				for (j = 0; j < opcnt; j++)
> > +					seq_printf(m, "%s%s",
> > +						   nfsd4_op_name(rqstp_info.opnum[j]),
> > +						   j == opcnt - 1 ? "" : ":");
> > +			} else {
> > +				seq_puts(m, " -");
> > +			}
> 
> This looks correct to me.
> 
> I'm leaning towards moving this to a netlink API that can be
> extended over time to handle other stats and also act as an NFSD
> control plane, similar to other network subsystems.
> 
> Any comments, complaints or rotten fruit from anyone?

I think netlink is the best way forward.  'cat' is nice, but not
necessary.  We have an established path for distributing tools for
working with nfsd so we get easily get a suitable tool into the hands of
our users.

The only fruit I have relates to the name "rpc_status", and it probably
over-ripe rather than rotten :-)
In the context of RPC, "status" means the success/failure result of a
request.  That is not what this file provides.  It is a list of active
requests.  So maybe "active_rpc".
One advantage of netlink is that the API is based on numbers, not names!

NeilBrown


> 
> 
> > +			seq_puts(m, "\n");
> > +		}
> > +	}
> > +
> > +	rcu_read_unlock();
> > +
> > +	return 0;
> > +}
> > +
> > +/**
> > + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler
> > + * @inode: entry inode pointer.
> > + * @file: entry file pointer.
> > + *
> > + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler.
> > + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server.
> > + */
> > +int nfsd_rpc_status_open(struct inode *inode, struct file *file)
> > +{
> > +	int ret = nfsd_stats_open(inode);
> > +
> > +	if (ret)
> > +		return ret;
> > +
> > +	return single_open(file, nfsd_rpc_status_show, inode->i_private);
> > +}
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index 7838b37bcfa8..b49c0470b4fe 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -251,6 +251,7 @@ struct svc_rqst {
> >  						 * net namespace
> >  						 */
> >  	void **			rq_lease_breaker; /* The v4 client breaking a lease */
> > +	unsigned int		rq_status_counter; /* RPC processing counter */
> >  };
> >  
> >  /* bits for rq_flags */
> > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> > index af692bff44ab..83bee19df104 100644
> > --- a/net/sunrpc/svc.c
> > +++ b/net/sunrpc/svc.c
> > @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp)
> >  		return rqstp->rq_procinfo->pc_name;
> >  	return "unknown";
> >  }
> > -
> > +EXPORT_SYMBOL_GPL(svc_proc_name);
> >  
> >  /**
> >   * svc_encode_result_payload - mark a range of bytes as a result payload
> > -- 
> > 2.41.0
> > 
> 
> -- 
> Chuck Lever
> 





[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux