Re: [PATCH v7 3/3] NFSD: add rpc_status netlink support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> On Wed, Aug 30, 2023 at 03:05:46PM +0200, Lorenzo Bianconi wrote:
> > Introduce rpc_status netlink support for NFSD in order to dump pending
> > RPC requests debugging information from userspace.
> 
> Very good to see this update!
> 
> netdev has asked that all new netlink protocols start from a yaml
> spec that resides under Documentation/netlink/specs/  That spec is
> then used to generate netlink parser code for the kernel and for
> user space tooling. You can find this all described here:
> 
> https://docs.kernel.org/next/userspace-api/netlink/specs.html
> 
> and here is a weak example of how this might be done:
> 
> https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/commit/net/handshake?id=3b3009ea8abb713b022d94fba95ec270cf6e7eae
> 
> I say weak because I did that work while the yaml spec tools were
> still under development. It might not completely reflect how this
> needs to be done today.
> 
> So the yaml file would be named something like:
> 
> Documentation/netlink/specs/nfsd.yaml
> 
> and it would generate files "fs/nfsd/netlink.[ch]". It should
> generate a lot of the parser boiler plate you've written below
> by hand, so just replace that code with calls to the generated
> code.

ack, I will look into it for v8

> 
> When you post the next revision of the series, cc: netdev.

ack, will do.

Regards,
Lorenzo

> 
> 
> > Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx>
> > ---
> >  fs/nfsd/nfsctl.c           | 275 +++++++++++++++++++++++++++++++++++++
> >  fs/nfsd/nfsd.h             |  19 +++
> >  fs/nfsd/nfssvc.c           |  15 ++
> >  fs/nfsd/state.h            |   2 -
> >  include/linux/sunrpc/svc.h |   1 +
> >  include/uapi/linux/nfs.h   |  54 ++++++++
> >  6 files changed, 364 insertions(+), 2 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
> > index 33f80d289d63..4626a0002ceb 100644
> > --- a/fs/nfsd/nfsctl.c
> > +++ b/fs/nfsd/nfsctl.c
> > @@ -17,6 +17,9 @@
> >  #include <linux/sunrpc/rpc_pipe_fs.h>
> >  #include <linux/module.h>
> >  #include <linux/fsnotify.h>
> > +#include <net/genetlink.h>
> > +#include <net/ip.h>
> > +#include <net/ipv6.h>
> >  
> >  #include "idmap.h"
> >  #include "nfsd.h"
> > @@ -1495,6 +1498,273 @@ static int create_proc_exports_entry(void)
> >  
> >  unsigned int nfsd_net_id;
> >  
> > +/* the netlink family */
> > +static struct genl_family nfsd_genl;
> > +
> > +static const struct nla_policy
> > +nfsd_rpc_status_compound_policy[NFS_ATTR_RPC_STATUS_COMPOUND_MAX + 1] = {
> > +	[NFS_ATTR_RPC_STATUS_COMPOUND_OP] = { .type = NLA_STRING },
> > +};
> > +
> > +static const struct nla_policy
> > +nfsd_rpc_status_policy[NFS_ATTR_RPC_STATUS_MAX + 1] = {
> > +	[NFS_ATTR_RPC_STATUS_XID] = { .type = NLA_U32 },
> > +	[NFS_ATTR_RPC_STATUS_FLAGS] = { .type = NLA_U32 },
> > +	[NFS_ATTR_RPC_STATUS_PC_NAME] = { .type = NLA_STRING },
> > +	[NFS_ATTR_RPC_STATUS_VERSION] = { .type = NLA_U8 },
> > +	[NFS_ATTR_RPC_STATUS_STIME] = { .type = NLA_S64 },
> > +	[NFS_ATTR_RPC_STATUS_SADDR4] = { .len = sizeof_field(struct iphdr, saddr) },
> > +	[NFS_ATTR_RPC_STATUS_DADDR4] = { .len = sizeof_field(struct iphdr, daddr) },
> > +	[NFS_ATTR_RPC_STATUS_SADDR6] = { .len = sizeof_field(struct ipv6hdr, saddr) },
> > +	[NFS_ATTR_RPC_STATUS_DADDR6] = { .len = sizeof_field(struct ipv6hdr, daddr) },
> > +	[NFS_ATTR_RPC_STATUS_SPORT] = { .type = NLA_U16 },
> > +	[NFS_ATTR_RPC_STATUS_DPORT] = { .type = NLA_U16 },
> > +	[NFS_ATTR_RPC_STATUS_COMPOUND] =
> > +		NLA_POLICY_NESTED_ARRAY(nfsd_rpc_status_compound_policy),
> > +};
> > +
> > +static const struct nla_policy
> > +nfsd_genl_policy[NFS_ATTR_MAX + 1] = {
> > +	[NFS_ATTR_RPC_STATUS] = NLA_POLICY_NESTED_ARRAY(nfsd_rpc_status_policy),
> > +};
> > +
> > +static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, int index,
> > +					    struct nfsd_genl_rqstp *rqstp)
> > +{
> > +	struct nlattr *rq_attr, *comp_attr;
> > +	int i;
> > +
> > +	rq_attr = nla_nest_start(skb, index);
> > +	if (!rq_attr)
> > +		return -ENOBUFS;
> > +
> > +	if (nla_put_be32(skb, NFS_ATTR_RPC_STATUS_XID, rqstp->rq_xid) ||
> > +	    nla_put_u32(skb, NFS_ATTR_RPC_STATUS_FLAGS, rqstp->rq_flags) ||
> > +	    nla_put_string(skb, NFS_ATTR_RPC_STATUS_PC_NAME, rqstp->pc_name) ||
> > +	    nla_put_u8(skb, NFS_ATTR_RPC_STATUS_VERSION, rqstp->rq_vers) ||
> > +	    nla_put_s64(skb, NFS_ATTR_RPC_STATUS_STIME,
> > +			ktime_to_us(rqstp->rq_stime), NFS_ATTR_RPC_STATUS_PAD))
> > +		return -ENOBUFS;
> > +
> > +	switch (rqstp->saddr.sa_family) {
> > +	case AF_INET: {
> > +		const struct sockaddr_in *s_in, *d_in;
> > +
> > +		s_in = (const struct sockaddr_in *)&rqstp->saddr;
> > +		d_in = (const struct sockaddr_in *)&rqstp->daddr;
> > +		if (nla_put_in_addr(skb, NFS_ATTR_RPC_STATUS_SADDR4,
> > +				    s_in->sin_addr.s_addr) ||
> > +		    nla_put_in_addr(skb, NFS_ATTR_RPC_STATUS_DADDR4,
> > +				    d_in->sin_addr.s_addr) ||
> > +		    nla_put_be16(skb, NFS_ATTR_RPC_STATUS_SPORT,
> > +				 s_in->sin_port) ||
> > +		    nla_put_be16(skb, NFS_ATTR_RPC_STATUS_DPORT,
> > +				 d_in->sin_port))
> > +			return -ENOBUFS;
> > +		break;
> > +	}
> > +	case AF_INET6: {
> > +		const struct sockaddr_in6 *s_in, *d_in;
> > +
> > +		s_in = (const struct sockaddr_in6 *)&rqstp->saddr;
> > +		d_in = (const struct sockaddr_in6 *)&rqstp->daddr;
> > +		if (nla_put_in6_addr(skb, NFS_ATTR_RPC_STATUS_SADDR6,
> > +				     &s_in->sin6_addr) ||
> > +		    nla_put_in6_addr(skb, NFS_ATTR_RPC_STATUS_DADDR6,
> > +				     &d_in->sin6_addr) ||
> > +		    nla_put_be16(skb, NFS_ATTR_RPC_STATUS_SPORT,
> > +				 s_in->sin6_port) ||
> > +		    nla_put_be16(skb, NFS_ATTR_RPC_STATUS_DPORT,
> > +				 d_in->sin6_port))
> > +			return -ENOBUFS;
> > +		break;
> > +	}
> > +	default:
> > +		break;
> > +	}
> > +
> > +	comp_attr = nla_nest_start(skb, NFS_ATTR_RPC_STATUS_COMPOUND);
> > +	if (!comp_attr)
> > +		return -ENOBUFS;
> > +
> > +	for (i = 0; i < rqstp->opcnt; i++) {
> > +		struct nlattr *op_attr;
> > +
> > +		op_attr = nla_nest_start(skb, i);
> > +		if (!op_attr)
> > +			return -ENOBUFS;
> > +
> > +		if (nla_put_string(skb, NFS_ATTR_RPC_STATUS_COMPOUND_OP,
> > +				   nfsd4_op_name(rqstp->opnum[i])))
> > +			return -ENOBUFS;
> > +
> > +		nla_nest_end(skb, op_attr);
> > +	}
> > +
> > +	nla_nest_end(skb, comp_attr);
> > +	nla_nest_end(skb, rq_attr);
> > +
> > +	return 0;
> > +}
> > +
> > +static int nfsd_genl_get_rpc_status(struct sk_buff *skb, struct genl_info *info)
> > +{
> > +	struct nfsd_net *nn = net_generic(genl_info_net(info), nfsd_net_id);
> > +	struct nlattr *rpc_attr;
> > +	int i, rqstp_index = 0;
> > +	struct sk_buff *msg;
> > +	void *hdr;
> > +
> > +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
> > +	if (!msg)
> > +		return -ENOMEM;
> > +
> > +	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &nfsd_genl,
> > +			  0, NFS_CMD_NEW_RPC_STATUS);
> > +	if (!hdr) {
> > +		nlmsg_free(msg);
> > +		return -ENOBUFS;
> > +	}
> > +
> > +	rpc_attr = nla_nest_start(msg, NFS_ATTR_RPC_STATUS);
> > +	if (!rpc_attr)
> > +		goto nla_put_failure;
> > +
> > +	rcu_read_lock();
> > +
> > +	for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) {
> > +		struct svc_rqst *rqstp;
> > +
> > +		list_for_each_entry_rcu(rqstp,
> > +				&nn->nfsd_serv->sv_pools[i].sp_all_threads,
> > +				rq_all) {
> > +			struct nfsd_genl_rqstp genl_rqstp;
> > +			unsigned int status_counter;
> > +
> > +			/*
> > +			 * Acquire rq_status_counter before parsing the rqst
> > +			 * fields. rq_status_counter is set to an odd value in
> > +			 * order to notify the consumers the rqstp fields are
> > +			 * meaningful.
> > +			 */
> > +			status_counter =
> > +				smp_load_acquire(&rqstp->rq_status_counter);
> > +			if (!(status_counter & 1))
> > +				continue;
> > +
> > +			genl_rqstp.rq_xid = rqstp->rq_xid;
> > +			genl_rqstp.rq_flags = rqstp->rq_flags;
> > +			genl_rqstp.rq_vers = rqstp->rq_vers;
> > +			genl_rqstp.pc_name = svc_proc_name(rqstp);
> > +			genl_rqstp.rq_stime = rqstp->rq_stime;
> > +			genl_rqstp.opcnt = 0;
> > +			memcpy(&genl_rqstp.daddr, svc_daddr(rqstp),
> > +			       sizeof(struct sockaddr));
> > +			memcpy(&genl_rqstp.saddr, svc_addr(rqstp),
> > +			       sizeof(struct sockaddr));
> > +
> > +#ifdef CONFIG_NFSD_V4
> > +			if (rqstp->rq_vers == NFS4_VERSION &&
> > +			    rqstp->rq_proc == NFSPROC4_COMPOUND) {
> > +				/* NFSv4 compund */
> > +				struct nfsd4_compoundargs *args;
> > +				int j;
> > +
> > +				args = rqstp->rq_argp;
> > +				genl_rqstp.opcnt = args->opcnt;
> > +				for (j = 0; j < genl_rqstp.opcnt; j++)
> > +					genl_rqstp.opnum[j] =
> > +						args->ops[j].opnum;
> > +			}
> > +#endif /* CONFIG_NFSD_V4 */
> > +
> > +			/*
> > +			 * Acquire rq_status_counter before reporting the rqst
> > +			 * fields to the user.
> > +			 */
> > +			if (smp_load_acquire(&rqstp->rq_status_counter) !=
> > +			    status_counter)
> > +				continue;
> > +
> > +			if (nfsd_genl_rpc_status_compose_msg(msg,
> > +							     rqstp_index++,
> > +							     &genl_rqstp))
> > +				goto nla_put_failure_rcu;
> > +		}
> > +	}
> > +
> > +	rcu_read_unlock();
> > +
> > +	nla_nest_end(msg, rpc_attr);
> > +	genlmsg_end(msg, hdr);
> > +
> > +	return genlmsg_reply(msg, info);
> > +
> > +nla_put_failure_rcu:
> > +	rcu_read_unlock();
> > +nla_put_failure:
> > +	genlmsg_cancel(msg, hdr);
> > +	nlmsg_free(msg);
> > +
> > +	return -EMSGSIZE;
> > +}
> > +
> > +static int nfsd_genl_pre_doit(const struct genl_split_ops *ops,
> > +			      struct sk_buff *skb, struct genl_info *info)
> > +{
> > +	struct nfsd_net *nn = net_generic(genl_info_net(info), nfsd_net_id);
> > +
> > +	if (ops->internal_flags & NFSD_FLAG_NEED_REF_COUNT) {
> > +		int ret = -ENODEV;
> > +
> > +		mutex_lock(&nfsd_mutex);
> > +		if (nn->nfsd_serv) {
> > +			svc_get(nn->nfsd_serv);
> > +			ret = 0;
> > +		}
> > +		mutex_unlock(&nfsd_mutex);
> > +
> > +		return ret;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static void nfsd_genl_post_doit(const struct genl_split_ops *ops,
> > +				struct sk_buff *skb, struct genl_info *info)
> > +{
> > +	if (ops->internal_flags & NFSD_FLAG_NEED_REF_COUNT) {
> > +		mutex_lock(&nfsd_mutex);
> > +		nfsd_put(genl_info_net(info));
> > +		mutex_unlock(&nfsd_mutex);
> > +	}
> > +}
> > +
> > +static struct genl_small_ops nfsd_genl_ops[] = {
> > +	{
> > +		.cmd = NFS_CMD_GET_RPC_STATUS,
> > +		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
> > +		.doit = nfsd_genl_get_rpc_status,
> > +		.internal_flags = NFSD_FLAG_NEED_REF_COUNT,
> > +	},
> > +};
> > +
> > +static struct genl_family nfsd_genl __ro_after_init = {
> > +	.name = "nfsd_server",
> > +	.version = 1,
> > +	.maxattr = NFS_ATTR_MAX,
> > +	.module = THIS_MODULE,
> > +	.netnsok = true,
> > +	.parallel_ops = true,
> > +	.hdrsize = 0,
> > +	.pre_doit = nfsd_genl_pre_doit,
> > +	.post_doit = nfsd_genl_post_doit,
> > +	.policy = nfsd_genl_policy,
> > +	.small_ops = nfsd_genl_ops,
> > +	.n_small_ops = ARRAY_SIZE(nfsd_genl_ops),
> > +	.resv_start_op = NFS_CMD_NEW_RPC_STATUS + 1,
> > +};
> > +
> >  /**
> >   * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
> >   * @net: a freshly-created network namespace
> > @@ -1589,6 +1859,10 @@ static int __init init_nfsd(void)
> >  	retval = register_filesystem(&nfsd_fs_type);
> >  	if (retval)
> >  		goto out_free_all;
> > +	retval = genl_register_family(&nfsd_genl);
> > +	if (retval)
> > +		goto out_free_all;
> > +
> >  	return 0;
> >  out_free_all:
> >  	nfsd4_destroy_laundry_wq();
> > @@ -1613,6 +1887,7 @@ static int __init init_nfsd(void)
> >  
> >  static void __exit exit_nfsd(void)
> >  {
> > +	genl_unregister_family(&nfsd_genl);
> >  	unregister_filesystem(&nfsd_fs_type);
> >  	nfsd4_destroy_laundry_wq();
> >  	unregister_cld_notifier();
> > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> > index e95c3322eb9b..749c871b3291 100644
> > --- a/fs/nfsd/nfsd.h
> > +++ b/fs/nfsd/nfsd.h
> > @@ -62,6 +62,25 @@ struct readdir_cd {
> >  	__be32			err;	/* 0, nfserr, or nfserr_eof */
> >  };
> >  
> > +enum nfsd_genl_internal_flag {
> > +	NFSD_FLAG_NEED_REF_COUNT = BIT(0),
> > +};
> > +
> > +/* Maximum number of operations per session compound */
> > +#define NFSD_MAX_OPS_PER_COMPOUND	50
> > +
> > +struct nfsd_genl_rqstp {
> > +	struct sockaddr daddr;
> > +	struct sockaddr saddr;
> > +	unsigned long rq_flags;
> > +	const char *pc_name;
> > +	ktime_t rq_stime;
> > +	__be32 rq_xid;
> > +	u32 rq_vers;
> > +	/* NFSv4 compund */
> > +	u32 opnum[NFSD_MAX_OPS_PER_COMPOUND];
> > +	u16 opcnt;
> > +};
> >  
> >  extern struct svc_program	nfsd_program;
> >  extern const struct svc_version	nfsd_version2, nfsd_version3, nfsd_version4;
> > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> > index 1582af33e204..fad34a7325b3 100644
> > --- a/fs/nfsd/nfssvc.c
> > +++ b/fs/nfsd/nfssvc.c
> > @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> >  	if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
> >  		goto out_decode_err;
> >  
> > +	/*
> > +	 * Release rq_status_counter setting it to an odd value after the rpc
> > +	 * request has been properly parsed. rq_status_counter is used to
> > +	 * notify the consumers if the rqstp fields are stable
> > +	 * (rq_status_counter is odd) or not meaningful (rq_status_counter
> > +	 * is even).
> > +	 */
> > +	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1);
> > +
> >  	rp = NULL;
> >  	switch (nfsd_cache_lookup(rqstp, &rp)) {
> >  	case RC_DOIT:
> > @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
> >  	if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
> >  		goto out_encode_err;
> >  
> > +	/*
> > +	 * Release rq_status_counter setting it to an even value after the rpc
> > +	 * request has been properly processed.
> > +	 */
> > +	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
> > +
> >  	nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
> >  out_cached_reply:
> >  	return 1;
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index cbddcf484dba..41bdc913fa71 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -174,8 +174,6 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
> >  
> >  /* Maximum number of slots per session. 160 is useful for long haul TCP */
> >  #define NFSD_MAX_SLOTS_PER_SESSION     160
> > -/* Maximum number of operations per session compound */
> > -#define NFSD_MAX_OPS_PER_COMPOUND	50
> >  /* Maximum  session per slot cache size */
> >  #define NFSD_SLOT_CACHE_SIZE		2048
> >  /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index dbf5b21feafe..caa20defd255 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -251,6 +251,7 @@ struct svc_rqst {
> >  						 * net namespace
> >  						 */
> >  	void **			rq_lease_breaker; /* The v4 client breaking a lease */
> > +	unsigned int		rq_status_counter; /* RPC processing counter */
> >  };
> >  
> >  /* bits for rq_flags */
> > diff --git a/include/uapi/linux/nfs.h b/include/uapi/linux/nfs.h
> > index 946cb62d64b0..86a5daaaf9d9 100644
> > --- a/include/uapi/linux/nfs.h
> > +++ b/include/uapi/linux/nfs.h
> > @@ -132,4 +132,58 @@ enum nfs_ftype {
> >  	NFFIFO = 8
> >  };
> >  
> > +enum nfs_commands {
> > +	NFS_CMD_UNSPEC,
> > +
> > +	NFS_CMD_GET_RPC_STATUS,
> > +	NFS_CMD_NEW_RPC_STATUS,
> > +
> > +	/* add new commands above here */
> > +
> > +	__NFS_CMD_MAX,
> > +	NFS_CMD_MAX = __NFS_CMD_MAX - 1,
> > +};
> > +
> > +enum nfs_rcp_status_compound_attrs {
> > +	__NFS_ATTR_RPC_STATUS_COMPOUND_UNSPEC,
> > +	NFS_ATTR_RPC_STATUS_COMPOUND_OP,
> > +
> > +	/* keep it last */
> > +	NUM_NFS_ATTR_RPC_STATUS_COMPOUND,
> > +	NFS_ATTR_RPC_STATUS_COMPOUND_MAX = NUM_NFS_ATTR_RPC_STATUS_COMPOUND - 1,
> > +};
> > +
> > +enum nfs_rpc_status_attrs {
> > +	__NFS_ATTR_RPC_STATUS_UNSPEC,
> > +
> > +	NFS_ATTR_RPC_STATUS_XID,
> > +	NFS_ATTR_RPC_STATUS_FLAGS,
> > +	NFS_ATTR_RPC_STATUS_PC_NAME,
> > +	NFS_ATTR_RPC_STATUS_VERSION,
> > +	NFS_ATTR_RPC_STATUS_STIME,
> > +	NFS_ATTR_RPC_STATUS_SADDR4,
> > +	NFS_ATTR_RPC_STATUS_DADDR4,
> > +	NFS_ATTR_RPC_STATUS_SADDR6,
> > +	NFS_ATTR_RPC_STATUS_DADDR6,
> > +	NFS_ATTR_RPC_STATUS_SPORT,
> > +	NFS_ATTR_RPC_STATUS_DPORT,
> > +	NFS_ATTR_RPC_STATUS_PAD,
> > +	NFS_ATTR_RPC_STATUS_COMPOUND,
> > +
> > +	/* keep it last */
> > +	NUM_NFS_ATTR_RPC_STATUS,
> > +	NFS_ATTR_RPC_STATUS_MAX = NUM_NFS_ATTR_RPC_STATUS - 1,
> > +};
> > +
> > +enum nfs_attrs {
> > +	NFS_ATTR_UNSPEC,
> > +
> > +	NFS_ATTR_RPC_STATUS,
> > +
> > +	/* add new attributes above here */
> > +
> > +	__NFS_ATTR_MAX,
> > +	NFS_ATTR_MAX = __NFS_ATTR_MAX - 1
> > +};
> > +
> >  #endif /* _UAPI_LINUX_NFS_H */
> > -- 
> > 2.41.0
> > 
> 
> -- 
> Chuck Lever
> 

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux