[PATCH v8 06/11] fs: add percpu counters for significant multigrain timestamp events

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



New percpu counters for counting various stats around mgtimes, and a new
debugfs file for displaying them when CONFIG_DEBUG_FS is enabled:

- number of attempted ctime updates
- number of successful i_ctime_nsec swaps
- number of fine-grained timestamp fetches
- number of coarse-grained floor swaps

Reviewed-by: Josef Bacik <josef@xxxxxxxxxxxxxx>
Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>
Reviewed-by: Jan Kara <jack@xxxxxxx>
Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
---
 fs/inode.c                         | 76 ++++++++++++++++++++++++++++++++++++--
 include/linux/timekeeping.h        |  1 +
 kernel/time/timekeeping.c          |  3 +-
 kernel/time/timekeeping_debug.c    | 12 ++++++
 kernel/time/timekeeping_internal.h |  3 ++
 5 files changed, 90 insertions(+), 5 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index d7da9d06921f..1f0487104c71 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,6 +21,8 @@
 #include <linux/list_lru.h>
 #include <linux/iversion.h>
 #include <linux/rw_hint.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 #include <trace/events/writeback.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/timestamp.h>
@@ -101,6 +103,70 @@ long get_nr_dirty_inodes(void)
 	return nr_dirty > 0 ? nr_dirty : 0;
 }
 
+#ifdef CONFIG_DEBUG_FS
+static DEFINE_PER_CPU(long, mg_ctime_updates);
+static DEFINE_PER_CPU(long, mg_fine_stamps);
+static DEFINE_PER_CPU(long, mg_ctime_swaps);
+
+static long get_mg_ctime_updates(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_ctime_updates, i);
+	return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_fine_stamps(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_fine_stamps, i);
+	return sum < 0 ? 0 : sum;
+}
+
+static long get_mg_ctime_swaps(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_ctime_swaps, i);
+	return sum < 0 ? 0 : sum;
+}
+
+#define mgtime_counter_inc(__var)	this_cpu_inc(__var)
+
+static int mgts_show(struct seq_file *s, void *p)
+{
+	long ctime_updates = get_mg_ctime_updates();
+	long ctime_swaps = get_mg_ctime_swaps();
+	long fine_stamps = get_mg_fine_stamps();
+	long floor_swaps = get_mg_floor_swaps();
+
+	seq_printf(s, "%ld %ld %ld %ld\n",
+		   ctime_updates, ctime_swaps, fine_stamps, floor_swaps);
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mgts);
+
+static int __init mg_debugfs_init(void)
+{
+	debugfs_create_file("multigrain_timestamps", S_IFREG | S_IRUGO, NULL, NULL, &mgts_fops);
+	return 0;
+}
+late_initcall(mg_debugfs_init);
+
+#else /* ! CONFIG_DEBUG_FS */
+
+#define mgtime_counter_inc()	do { } while (0)
+
+#endif /* CONFIG_DEBUG_FS */
+
 /*
  * Handle nr_inode sysctl
  */
@@ -2655,10 +2721,9 @@ EXPORT_SYMBOL(timestamp_truncate);
  *
  * If it is multigrain, then we first see if the coarse-grained timestamp is
  * distinct from what we have. If so, then we'll just use that. If we have to
- * get a fine-grained timestamp, then do so, and try to swap it into the floor.
- * We accept the new floor value regardless of the outcome of the cmpxchg.
- * After that, we try to swap the new value into i_ctime_nsec. Again, we take
- * the resulting ctime, regardless of the outcome of the swap.
+ * get a fine-grained timestamp, then do so. After that, we try to swap the new
+ * value into i_ctime_nsec. We take the resulting ctime, regardless of the
+ * outcome of the swap.
  */
 struct timespec64 inode_set_ctime_current(struct inode *inode)
 {
@@ -2687,8 +2752,10 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
 		if (timespec64_compare(&now, &ctime) <= 0) {
 			ktime_get_real_ts64_mg(&now);
 			now = timestamp_truncate(now, inode);
+			mgtime_counter_inc(mg_fine_stamps);
 		}
 	}
+	mgtime_counter_inc(mg_ctime_updates);
 
 	/* No need to cmpxchg if it's exactly the same */
 	if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) {
@@ -2702,6 +2769,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
 		/* If swap occurred, then we're (mostly) done */
 		inode->i_ctime_sec = now.tv_sec;
 		trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
+		mgtime_counter_inc(mg_ctime_swaps);
 	} else {
 		/*
 		 * Was the change due to someone marking the old ctime QUERIED?
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7aa85246c183..b9c8c597a073 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
 /* Multigrain timestamp interfaces */
 extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
 extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern long get_mg_floor_swaps(void);
 
 void getboottime64(struct timespec64 *ts);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16937242b904..94b0219955a2 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -2440,7 +2440,7 @@ EXPORT_SYMBOL_GPL(ktime_get_coarse_real_ts64_mg);
  * regardless of the outcome of the swap. Note that this is a filesystem
  * specific interface and should be avoided outside of that context.
  */
-void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
+void ktime_get_real_ts64_mg(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	ktime_t old = atomic64_read(&mg_floor);
@@ -2464,6 +2464,7 @@ void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie)
 	if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) {
 		ts->tv_nsec = 0;
 		timespec64_add_ns(ts, nsecs);
+		mgtime_counter_inc(mg_floor_swaps);
 	} else {
 		/*
 		 * Something has changed mg_floor since "old" was
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index b73e8850e58d..9a3792072762 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -17,6 +17,9 @@
 
 #define NUM_BINS 32
 
+/* incremented every time mg_floor is updated */
+DEFINE_PER_CPU(long, mg_floor_swaps);
+
 static unsigned int sleep_time_bin[NUM_BINS] = {0};
 
 static int tk_debug_sleep_time_show(struct seq_file *s, void *data)
@@ -53,3 +56,12 @@ void tk_debug_account_sleep_time(const struct timespec64 *t)
 			   (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC);
 }
 
+long get_mg_floor_swaps(void)
+{
+	int i;
+	long sum = 0;
+
+	for_each_possible_cpu(i)
+		sum += per_cpu(mg_floor_swaps, i);
+	return sum < 0 ? 0 : sum;
+}
diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h
index 4ca2787d1642..2b49332b45a5 100644
--- a/kernel/time/timekeeping_internal.h
+++ b/kernel/time/timekeeping_internal.h
@@ -11,8 +11,11 @@
  */
 #ifdef CONFIG_DEBUG_FS
 extern void tk_debug_account_sleep_time(const struct timespec64 *t);
+DECLARE_PER_CPU(long, mg_floor_swaps);
+#define mgtime_counter_inc(__var)	this_cpu_inc(__var)
 #else
 #define tk_debug_account_sleep_time(x)
+#define mgtime_counter_inc()	do { } while (0)
 #endif
 
 #ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE

-- 
2.46.0





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux