[PATCH 3/3] SUNRPC: Count ops completing with tk_status < 0

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We often see various error conditions with NFS4.x that show up with
a very high operation count all completing with tk_status < 0 in a
short period of time.  Add a count to rpc_iostats to record on a
per-op basis the ops that complete in this manner, which will
enable lower overhead diagnostics.

Signed-off-by: Dave Wysochanski <dwysocha@xxxxxxxxxx>
---
 include/linux/sunrpc/metrics.h | 7 ++++++-
 net/sunrpc/stats.c             | 8 ++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
index 1b3751327575..0ee3f7052846 100644
--- a/include/linux/sunrpc/metrics.h
+++ b/include/linux/sunrpc/metrics.h
@@ -30,7 +30,7 @@
 #include <linux/ktime.h>
 #include <linux/spinlock.h>
 
-#define RPC_IOSTATS_VERS	"1.0"
+#define RPC_IOSTATS_VERS	"1.1"
 
 struct rpc_iostats {
 	spinlock_t		om_lock;
@@ -66,6 +66,11 @@ struct rpc_iostats {
 	ktime_t			om_queue,	/* queued for xmit */
 				om_rtt,		/* RPC RTT */
 				om_execute;	/* RPC execution */
+	/*
+	 * The count of operations that complete with tk_status < 0.
+	 * These statuses usually indicate error conditions.
+	 */
+	unsigned long           om_error_status;
 } ____cacheline_aligned;
 
 struct rpc_task;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 8b2d3c58ffae..737414247ca7 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -176,6 +176,8 @@ void rpc_count_iostats_metrics(const struct rpc_task *task,
 
 	execute = ktime_sub(now, task->tk_start);
 	op_metrics->om_execute = ktime_add(op_metrics->om_execute, execute);
+	if (task->tk_status < 0)
+		op_metrics->om_error_status++;
 
 	spin_unlock(&op_metrics->om_lock);
 
@@ -218,13 +220,14 @@ static void _add_rpc_iostats(struct rpc_iostats *a, struct rpc_iostats *b)
 	a->om_queue = ktime_add(a->om_queue, b->om_queue);
 	a->om_rtt = ktime_add(a->om_rtt, b->om_rtt);
 	a->om_execute = ktime_add(a->om_execute, b->om_execute);
+	a->om_error_status += b->om_error_status;
 }
 
 static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
 			       int op, const struct rpc_procinfo *procs)
 {
 	_print_name(seq, op, procs);
-	seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu\n",
+	seq_printf(seq, "%lu %lu %lu %llu %llu %llu %llu %llu %lu\n",
 		   stats->om_ops,
 		   stats->om_ntrans,
 		   stats->om_timeouts,
@@ -232,7 +235,8 @@ static void _print_rpc_iostats(struct seq_file *seq, struct rpc_iostats *stats,
 		   stats->om_bytes_recv,
 		   ktime_to_ms(stats->om_queue),
 		   ktime_to_ms(stats->om_rtt),
-		   ktime_to_ms(stats->om_execute));
+		   ktime_to_ms(stats->om_execute),
+		   stats->om_error_status);
 }
 
 void rpc_clnt_show_stats(struct seq_file *seq, struct rpc_clnt *clnt)
-- 
2.20.1




[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux