From: Xiubo Li <xiubli@xxxxxxxxxx> These will be very useful help diagnose problems. URL: https://tracker.ceph.com/issues/44533 Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx> --- Changed in V2: - switch spin lock to cmpxchg Changed in V3: - add the __update_min/max_latency helpers - minor fix fs/ceph/debugfs.c | 26 +++++++++++++++++++++----- fs/ceph/mds_client.c | 9 +++++++++ fs/ceph/metric.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 60f3e307..bcf7215 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -130,27 +130,43 @@ static int metric_show(struct seq_file *s, void *p) struct ceph_mds_client *mdsc = fsc->mdsc; int i, nr_caps = 0; s64 total, sum, avg = 0; + unsigned long min, max; - seq_printf(s, "item total sum_lat(us) avg_lat(us)\n"); - seq_printf(s, "-----------------------------------------------------\n"); + seq_printf(s, "item total sum_lat(us) avg_lat(us) min_lat(us) max_lat(us)\n"); + seq_printf(s, "-------------------------------------------------------------------------------------\n"); total = percpu_counter_sum(&mdsc->metric.total_reads); sum = percpu_counter_sum(&mdsc->metric.read_latency_sum); sum = jiffies_to_usecs(sum); avg = total ? sum / total : 0; - seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "read", total, sum, avg); + min = atomic_long_read(&mdsc->metric.read_latency_min); + min = jiffies_to_usecs(min == ULONG_MAX ? 0 : min); + max = atomic_long_read(&mdsc->metric.read_latency_max); + max = jiffies_to_usecs(max); + seq_printf(s, "%-14s%-12lld%-16lld%-16lld%-16ld%ld\n", "read", + total, sum, avg, min, max); total = percpu_counter_sum(&mdsc->metric.total_writes); sum = percpu_counter_sum(&mdsc->metric.write_latency_sum); sum = jiffies_to_usecs(sum); avg = total ? sum / total : 0; - seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); + min = atomic_long_read(&mdsc->metric.write_latency_min); + min = jiffies_to_usecs(min == ULONG_MAX ? 0 : min); + max = atomic_long_read(&mdsc->metric.write_latency_max); + max = jiffies_to_usecs(max); + seq_printf(s, "%-14s%-12lld%-16lld%-16lld%-16ld%ld\n", "write", + total, sum, avg, min, max); total = percpu_counter_sum(&mdsc->metric.total_metadatas); sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); sum = jiffies_to_usecs(sum); avg = total ? sum / total : 0; - seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); + min = atomic_long_read(&mdsc->metric.metadata_latency_min); + min = jiffies_to_usecs(min == ULONG_MAX ? 0 : min); + max = atomic_long_read(&mdsc->metric.metadata_latency_max); + max = jiffies_to_usecs(max); + seq_printf(s, "%-14s%-12lld%-16lld%-16lld%-16ld%ld\n", "metadata", + total, sum, avg, min, max); seq_printf(s, "\n"); seq_printf(s, "item total miss hit\n"); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 5c03ed3..7844aa6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4358,6 +4358,9 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric) if (ret) goto err_read_latency_sum; + atomic_long_set(&metric->read_latency_min, ULONG_MAX); + atomic_long_set(&metric->read_latency_max, 0); + ret = percpu_counter_init(&metric->total_writes, 0, GFP_KERNEL); if (ret) goto err_total_writes; @@ -4366,6 +4369,9 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric) if (ret) goto err_write_latency_sum; + atomic_long_set(&metric->write_latency_min, ULONG_MAX); + atomic_long_set(&metric->write_latency_max, 0); + ret = percpu_counter_init(&metric->total_metadatas, 0, GFP_KERNEL); if (ret) goto err_total_metadatas; @@ -4374,6 +4380,9 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric) if (ret) goto err_metadata_latency_sum; + atomic_long_set(&metric->metadata_latency_min, ULONG_MAX); + atomic_long_set(&metric->metadata_latency_max, 0); + return 0; err_metadata_latency_sum: diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index faba142..c9c76d5 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -2,6 +2,10 @@ #ifndef _FS_CEPH_MDS_METRIC_H #define _FS_CEPH_MDS_METRIC_H +#include <linux/atomic.h> +#include <linux/percpu.h> +#include <linux/spinlock.h> + /* This is the global metrics */ struct ceph_client_metric { atomic64_t total_dentries; @@ -13,12 +17,18 @@ struct ceph_client_metric { struct percpu_counter total_reads; struct percpu_counter read_latency_sum; + atomic_long_t read_latency_min; + atomic_long_t read_latency_max; struct percpu_counter total_writes; struct percpu_counter write_latency_sum; + atomic_long_t write_latency_min; + atomic_long_t write_latency_max; struct percpu_counter total_metadatas; struct percpu_counter metadata_latency_sum; + atomic_long_t metadata_latency_min; + atomic_long_t metadata_latency_max; }; static inline void ceph_update_cap_hit(struct ceph_client_metric *m) @@ -31,16 +41,44 @@ static inline void ceph_update_cap_mis(struct ceph_client_metric *m) percpu_counter_inc(&m->i_caps_mis); } +static inline void __update_min_latency(atomic_long_t *min, unsigned long lat) +{ + unsigned long cur, old; + + cur = atomic_long_read(min); + do { + old = cur; + if (likely(lat >= old)) + break; + } while ((cur = atomic_long_cmpxchg(min, old, lat)) != old); +} + +static inline void __update_max_latency(atomic_long_t *max, unsigned long lat) +{ + unsigned long cur, old; + + cur = atomic_long_read(max); + do { + old = cur; + if (likely(lat <= old)) + break; + } while ((cur = atomic_long_cmpxchg(max, old, lat)) != old); +} + static inline void ceph_update_read_latency(struct ceph_client_metric *m, unsigned long r_start, unsigned long r_end, int rc) { + unsigned long lat = r_end - r_start; + if (rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT) return; percpu_counter_inc(&m->total_reads); - percpu_counter_add(&m->read_latency_sum, r_end - r_start); + percpu_counter_add(&m->read_latency_sum, lat); + __update_min_latency(&m->read_latency_min, lat); + __update_max_latency(&m->read_latency_max, lat); } static inline void ceph_update_write_latency(struct ceph_client_metric *m, @@ -48,11 +86,15 @@ static inline void ceph_update_write_latency(struct ceph_client_metric *m, unsigned long r_end, int rc) { + unsigned long lat = r_end - r_start; + if (rc && rc != -ETIMEDOUT) return; percpu_counter_inc(&m->total_writes); - percpu_counter_add(&m->write_latency_sum, r_end - r_start); + percpu_counter_add(&m->write_latency_sum, lat); + __update_min_latency(&m->write_latency_min, lat); + __update_max_latency(&m->write_latency_max, lat); } static inline void ceph_update_metadata_latency(struct ceph_client_metric *m, @@ -60,10 +102,14 @@ static inline void ceph_update_metadata_latency(struct ceph_client_metric *m, unsigned long r_end, int rc) { + unsigned long lat = r_end - r_start; + if (rc && rc != -ENOENT) return; percpu_counter_inc(&m->total_metadatas); - percpu_counter_add(&m->metadata_latency_sum, r_end - r_start); + percpu_counter_add(&m->metadata_latency_sum, lat); + __update_min_latency(&m->metadata_latency_min, lat); + __update_max_latency(&m->metadata_latency_max, lat); } #endif /* _FS_CEPH_MDS_METRIC_H */ -- 1.8.3.1