On Thu, Aug 10, 2023 at 10:39:21AM +0200, Lorenzo Bianconi wrote: > Introduce rpc_status entry in nfsd debug filesystem in order to dump > pending RPC requests debugging information. > > Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=366 > Reviewed-by: NeilBrown <neilb@xxxxxxx> > Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> > Signed-off-by: Lorenzo Bianconi <lorenzo@xxxxxxxxxx> > --- > fs/nfsd/nfs4proc.c | 4 +- > fs/nfsd/nfsctl.c | 9 +++ > fs/nfsd/nfsd.h | 7 ++ > fs/nfsd/nfssvc.c | 140 +++++++++++++++++++++++++++++++++++++ > include/linux/sunrpc/svc.h | 1 + > net/sunrpc/svc.c | 2 +- > 6 files changed, 159 insertions(+), 4 deletions(-) > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > index aa4f21f92deb..ff5a1dddc0ed 100644 > --- a/fs/nfsd/nfs4proc.c > +++ b/fs/nfsd/nfs4proc.c > @@ -2496,8 +2496,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum) > > static const struct nfsd4_operation nfsd4_ops[]; > > -static const char *nfsd4_op_name(unsigned opnum); > - > /* > * Enforce NFSv4.1 COMPOUND ordering rules: > * > @@ -3627,7 +3625,7 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op) > } > } > > -static const char *nfsd4_op_name(unsigned opnum) > +const char *nfsd4_op_name(unsigned int opnum) > { > if (opnum < ARRAY_SIZE(nfsd4_ops)) > return nfsd4_ops[opnum].op_name; > diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c > index dad88bff3b9e..83eb5c6d894e 100644 > --- a/fs/nfsd/nfsctl.c > +++ b/fs/nfsd/nfsctl.c > @@ -47,6 +47,7 @@ enum { > NFSD_MaxBlkSize, > NFSD_MaxConnections, > NFSD_Filecache, > + NFSD_Rpc_Status, > /* > * The below MUST come last. Otherwise we leave a hole in nfsd_files[] > * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops > @@ -195,6 +196,13 @@ static inline struct net *netns(struct file *file) > return file_inode(file)->i_sb->s_fs_info; > } > > +static const struct file_operations nfsd_rpc_status_operations = { > + .open = nfsd_rpc_status_open, > + .read = seq_read, > + .llseek = seq_lseek, > + .release = nfsd_stats_release, > +}; > + > /* > * write_unlock_ip - Release all locks used by a client > * > @@ -1394,6 +1402,7 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) > [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, > [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, > [NFSD_Filecache] = {"filecache", &nfsd_file_cache_stats_fops, S_IRUGO}, > + [NFSD_Rpc_Status] = {"rpc_status", &nfsd_rpc_status_operations, S_IRUGO}, > #ifdef CONFIG_NFSD_V4 > [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, > [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 55b9d85ed71b..3e8a47b93fd4 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -94,6 +94,7 @@ int nfsd_get_nrthreads(int n, int *, struct net *); > int nfsd_set_nrthreads(int n, int *, struct net *); > int nfsd_pool_stats_open(struct inode *, struct file *); > int nfsd_stats_release(struct inode *, struct file *); > +int nfsd_rpc_status_open(struct inode *inode, struct file *file); > void nfsd_shutdown_threads(struct net *net); > > static inline void nfsd_put(struct net *net) > @@ -511,12 +512,18 @@ extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); > > extern void nfsd4_init_leases_net(struct nfsd_net *nn); > > +const char *nfsd4_op_name(unsigned int opnum); > #else /* CONFIG_NFSD_V4 */ > static inline int nfsd4_is_junction(struct dentry *dentry) > { > return 0; > } > > +static inline const char *nfsd4_op_name(unsigned int opnum) > +{ > + return "unknown_operation"; > +} > + > static inline void nfsd4_init_leases_net(struct nfsd_net *nn) { }; > > #define register_cld_notifier() 0 > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > index 460219030ce1..237be14d3a11 100644 > --- a/fs/nfsd/nfssvc.c > +++ b/fs/nfsd/nfssvc.c > @@ -998,6 +998,15 @@ int nfsd_dispatch(struct svc_rqst *rqstp) > if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream)) > goto out_decode_err; > > + /* > + * Release rq_status_counter setting it to an odd value after the rpc > + * request has been properly parsed. rq_status_counter is used to > + * notify the consumers if the rqstp fields are stable > + * (rq_status_counter is odd) or not meaningful (rq_status_counter > + * is even). > + */ > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter | 1); > + > rp = NULL; > switch (nfsd_cache_lookup(rqstp, &rp)) { > case RC_DOIT: > @@ -1015,6 +1024,12 @@ int nfsd_dispatch(struct svc_rqst *rqstp) > if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream)) > goto out_encode_err; > > + /* > + * Release rq_status_counter setting it to an even value after the rpc > + * request has been properly processed. > + */ > + smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1); > + > nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1); > out_cached_reply: > return 1; > @@ -1101,3 +1116,128 @@ int nfsd_stats_release(struct inode *inode, struct file *file) > mutex_unlock(&nfsd_mutex); > return ret; > } > + > +static int nfsd_rpc_status_show(struct seq_file *m, void *v) > +{ > + struct inode *inode = file_inode(m->file); > + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); > + int i; > + > + seq_puts(m, "# XID FLAGS VERS PROC TIMESTAMP SADDR SPORT DADDR DPORT COMPOUND_OPS\n"); > + > + rcu_read_lock(); > + > + for (i = 0; i < nn->nfsd_serv->sv_nrpools; i++) { > + struct svc_rqst *rqstp; > + > + list_for_each_entry_rcu(rqstp, > + &nn->nfsd_serv->sv_pools[i].sp_all_threads, > + rq_all) { > + struct { > + struct sockaddr daddr; > + struct sockaddr saddr; > + unsigned long rq_flags; > + const char *pc_name; > + ktime_t rq_stime; > + __be32 rq_xid; > + u32 rq_vers; > + /* NFSv4 compund */ > + u32 opnum[NFSD_MAX_OPS_PER_COMPOUND]; > + } rqstp_info; > + unsigned int status_counter; > + char buf[RPC_MAX_ADDRBUFLEN]; > + int opcnt = 0; > + > + /* > + * Acquire rq_status_counter before parsing the rqst > + * fields. rq_status_counter is set to an odd value in > + * order to notify the consumers the rqstp fields are > + * meaningful. > + */ > + status_counter = smp_load_acquire(&rqstp->rq_status_counter); > + if (!(status_counter & 1)) > + continue; > + > + rqstp_info.rq_xid = rqstp->rq_xid; > + rqstp_info.rq_flags = rqstp->rq_flags; > + rqstp_info.rq_vers = rqstp->rq_vers; > + rqstp_info.pc_name = svc_proc_name(rqstp); > + rqstp_info.rq_stime = rqstp->rq_stime; > + memcpy(&rqstp_info.daddr, svc_daddr(rqstp), > + sizeof(struct sockaddr)); > + memcpy(&rqstp_info.saddr, svc_addr(rqstp), > + sizeof(struct sockaddr)); > + > +#ifdef CONFIG_NFSD_V4 > + if (rqstp->rq_vers == NFS4_VERSION && > + rqstp->rq_proc == NFSPROC4_COMPOUND) { > + /* NFSv4 compund */ > + struct nfsd4_compoundargs *args = rqstp->rq_argp; > + int j; > + > + opcnt = args->opcnt; > + for (j = 0; j < opcnt; j++) { > + struct nfsd4_op *op = &args->ops[j]; > + > + rqstp_info.opnum[j] = op->opnum; > + } > + } > +#endif /* CONFIG_NFSD_V4 */ > + > + /* > + * Acquire rq_status_counter before reporting the rqst > + * fields to the user. > + */ > + if (smp_load_acquire(&rqstp->rq_status_counter) != status_counter) > + continue; > + > + seq_printf(m, > + "%04u %04ld NFSv%d %s %016lld", > + be32_to_cpu(rqstp_info.rq_xid), It's proper to display XIDs as 8-hexit hexadecimal values, as you did before. "0x%08x" is the correct format, as that matches the XID display format used by Wireshark and our tracepoints. > + rqstp_info.rq_flags, I didn't mean for you to change the flags format to decimal. I was trying to point out that the content of this field will need to be displayed symbolically if we care about an easy user experience. Let's stick with hex here. A clever user can read the bits directly from that. All others should have a tool that parses this field and prints the list of bits in it symbolically. > + rqstp_info.rq_vers, > + rqstp_info.pc_name, > + ktime_to_us(rqstp_info.rq_stime)); > + seq_printf(m, " %s", > + __svc_print_addr(&rqstp_info.saddr, buf, > + sizeof(buf), false)); > + seq_printf(m, " %s", > + __svc_print_addr(&rqstp_info.daddr, buf, > + sizeof(buf), false)); > + if (opcnt) { > + int j; > + > + seq_puts(m, " "); > + for (j = 0; j < opcnt; j++) > + seq_printf(m, "%s%s", > + nfsd4_op_name(rqstp_info.opnum[j]), > + j == opcnt - 1 ? "" : ":"); > + } else { > + seq_puts(m, " -"); > + } This looks correct to me. I'm leaning towards moving this to a netlink API that can be extended over time to handle other stats and also act as an NFSD control plane, similar to other network subsystems. Any comments, complaints or rotten fruit from anyone? > + seq_puts(m, "\n"); > + } > + } > + > + rcu_read_unlock(); > + > + return 0; > +} > + > +/** > + * nfsd_rpc_status_open - open routine for nfsd_rpc_status handler > + * @inode: entry inode pointer. > + * @file: entry file pointer. > + * > + * nfsd_rpc_status_open is the open routine for nfsd_rpc_status procfs handler. > + * nfsd_rpc_status dumps pending RPC requests info queued into nfs server. > + */ > +int nfsd_rpc_status_open(struct inode *inode, struct file *file) > +{ > + int ret = nfsd_stats_open(inode); > + > + if (ret) > + return ret; > + > + return single_open(file, nfsd_rpc_status_show, inode->i_private); > +} > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h > index 7838b37bcfa8..b49c0470b4fe 100644 > --- a/include/linux/sunrpc/svc.h > +++ b/include/linux/sunrpc/svc.h > @@ -251,6 +251,7 @@ struct svc_rqst { > * net namespace > */ > void ** rq_lease_breaker; /* The v4 client breaking a lease */ > + unsigned int rq_status_counter; /* RPC processing counter */ > }; > > /* bits for rq_flags */ > diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c > index af692bff44ab..83bee19df104 100644 > --- a/net/sunrpc/svc.c > +++ b/net/sunrpc/svc.c > @@ -1656,7 +1656,7 @@ const char *svc_proc_name(const struct svc_rqst *rqstp) > return rqstp->rq_procinfo->pc_name; > return "unknown"; > } > - > +EXPORT_SYMBOL_GPL(svc_proc_name); > > /** > * svc_encode_result_payload - mark a range of bytes as a result payload > -- > 2.41.0 > -- Chuck Lever