New percpu counters for counting various stats around mgtimes, and a new debugfs file for displaying them when CONFIG_DEBUG_FS is enabled: - number of attempted ctime updates - number of successful i_ctime_nsec swaps - number of fine-grained timestamp fetches - number of coarse-grained floor swaps Reviewed-by: Josef Bacik <josef@xxxxxxxxxxxxxx> Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> Reviewed-by: Jan Kara <jack@xxxxxxx> Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> --- fs/inode.c | 76 ++++++++++++++++++++++++++++++++++++-- include/linux/timekeeping.h | 1 + kernel/time/timekeeping.c | 3 +- kernel/time/timekeeping_debug.c | 12 ++++++ kernel/time/timekeeping_internal.h | 3 ++ 5 files changed, 90 insertions(+), 5 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index d7da9d06921f..1f0487104c71 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -21,6 +21,8 @@ #include <linux/list_lru.h> #include <linux/iversion.h> #include <linux/rw_hint.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> #include <trace/events/writeback.h> #define CREATE_TRACE_POINTS #include <trace/events/timestamp.h> @@ -101,6 +103,70 @@ long get_nr_dirty_inodes(void) return nr_dirty > 0 ? nr_dirty : 0; } +#ifdef CONFIG_DEBUG_FS +static DEFINE_PER_CPU(long, mg_ctime_updates); +static DEFINE_PER_CPU(long, mg_fine_stamps); +static DEFINE_PER_CPU(long, mg_ctime_swaps); + +static long get_mg_ctime_updates(void) +{ + int i; + long sum = 0; + + for_each_possible_cpu(i) + sum += per_cpu(mg_ctime_updates, i); + return sum < 0 ? 0 : sum; +} + +static long get_mg_fine_stamps(void) +{ + int i; + long sum = 0; + + for_each_possible_cpu(i) + sum += per_cpu(mg_fine_stamps, i); + return sum < 0 ? 0 : sum; +} + +static long get_mg_ctime_swaps(void) +{ + int i; + long sum = 0; + + for_each_possible_cpu(i) + sum += per_cpu(mg_ctime_swaps, i); + return sum < 0 ? 0 : sum; +} + +#define mgtime_counter_inc(__var) this_cpu_inc(__var) + +static int mgts_show(struct seq_file *s, void *p) +{ + long ctime_updates = get_mg_ctime_updates(); + long ctime_swaps = get_mg_ctime_swaps(); + long fine_stamps = get_mg_fine_stamps(); + long floor_swaps = get_mg_floor_swaps(); + + seq_printf(s, "%ld %ld %ld %ld\n", + ctime_updates, ctime_swaps, fine_stamps, floor_swaps); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(mgts); + +static int __init mg_debugfs_init(void) +{ + debugfs_create_file("multigrain_timestamps", S_IFREG | S_IRUGO, NULL, NULL, &mgts_fops); + return 0; +} +late_initcall(mg_debugfs_init); + +#else /* ! CONFIG_DEBUG_FS */ + +#define mgtime_counter_inc() do { } while (0) + +#endif /* CONFIG_DEBUG_FS */ + /* * Handle nr_inode sysctl */ @@ -2655,10 +2721,9 @@ EXPORT_SYMBOL(timestamp_truncate); * * If it is multigrain, then we first see if the coarse-grained timestamp is * distinct from what we have. If so, then we'll just use that. If we have to - * get a fine-grained timestamp, then do so, and try to swap it into the floor. - * We accept the new floor value regardless of the outcome of the cmpxchg. - * After that, we try to swap the new value into i_ctime_nsec. Again, we take - * the resulting ctime, regardless of the outcome of the swap. + * get a fine-grained timestamp, then do so. After that, we try to swap the new + * value into i_ctime_nsec. We take the resulting ctime, regardless of the + * outcome of the swap. */ struct timespec64 inode_set_ctime_current(struct inode *inode) { @@ -2687,8 +2752,10 @@ struct timespec64 inode_set_ctime_current(struct inode *inode) if (timespec64_compare(&now, &ctime) <= 0) { ktime_get_real_ts64_mg(&now); now = timestamp_truncate(now, inode); + mgtime_counter_inc(mg_fine_stamps); } } + mgtime_counter_inc(mg_ctime_updates); /* No need to cmpxchg if it's exactly the same */ if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) { @@ -2702,6 +2769,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode) /* If swap occurred, then we're (mostly) done */ inode->i_ctime_sec = now.tv_sec; trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur); + mgtime_counter_inc(mg_ctime_swaps); } else { /* * Was the change due to someone marking the old ctime QUERIED? diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 7aa85246c183..b9c8c597a073 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -48,6 +48,7 @@ extern void ktime_get_coarse_real_ts64(struct timespec64 *ts); /* Multigrain timestamp interfaces */ extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts); extern void ktime_get_real_ts64_mg(struct timespec64 *ts); +extern long get_mg_floor_swaps(void); void getboottime64(struct timespec64 *ts); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 16937242b904..94b0219955a2 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2440,7 +2440,7 @@ EXPORT_SYMBOL_GPL(ktime_get_coarse_real_ts64_mg); * regardless of the outcome of the swap. Note that this is a filesystem * specific interface and should be avoided outside of that context. */ -void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie) +void ktime_get_real_ts64_mg(struct timespec64 *ts) { struct timekeeper *tk = &tk_core.timekeeper; ktime_t old = atomic64_read(&mg_floor); @@ -2464,6 +2464,7 @@ void ktime_get_real_ts64_mg(struct timespec64 *ts, u64 cookie) if (atomic64_try_cmpxchg(&mg_floor, &old, mono)) { ts->tv_nsec = 0; timespec64_add_ns(ts, nsecs); + mgtime_counter_inc(mg_floor_swaps); } else { /* * Something has changed mg_floor since "old" was diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index b73e8850e58d..9a3792072762 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -17,6 +17,9 @@ #define NUM_BINS 32 +/* incremented every time mg_floor is updated */ +DEFINE_PER_CPU(long, mg_floor_swaps); + static unsigned int sleep_time_bin[NUM_BINS] = {0}; static int tk_debug_sleep_time_show(struct seq_file *s, void *data) @@ -53,3 +56,12 @@ void tk_debug_account_sleep_time(const struct timespec64 *t) (s64)t->tv_sec, t->tv_nsec / NSEC_PER_MSEC); } +long get_mg_floor_swaps(void) +{ + int i; + long sum = 0; + + for_each_possible_cpu(i) + sum += per_cpu(mg_floor_swaps, i); + return sum < 0 ? 0 : sum; +} diff --git a/kernel/time/timekeeping_internal.h b/kernel/time/timekeeping_internal.h index 4ca2787d1642..2b49332b45a5 100644 --- a/kernel/time/timekeeping_internal.h +++ b/kernel/time/timekeeping_internal.h @@ -11,8 +11,11 @@ */ #ifdef CONFIG_DEBUG_FS extern void tk_debug_account_sleep_time(const struct timespec64 *t); +DECLARE_PER_CPU(long, mg_floor_swaps); +#define mgtime_counter_inc(__var) this_cpu_inc(__var) #else #define tk_debug_account_sleep_time(x) +#define mgtime_counter_inc() do { } while (0) #endif #ifdef CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE -- 2.46.0