The math involved in tracking average and standard deviation for r/w/m latencies looks incorrect. Fix that up. Also, change the variable name that tracks standard deviation (*_sq_sum) to *_stdev. Signed-off-by: Venky Shankar <vshankar@xxxxxxxxxx> --- fs/ceph/debugfs.c | 14 +++++----- fs/ceph/metric.c | 70 ++++++++++++++++++++++------------------------- fs/ceph/metric.h | 9 ++++-- 3 files changed, 45 insertions(+), 48 deletions(-) diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 38b78b45811f..3abfa7ae8220 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -152,7 +152,7 @@ static int metric_show(struct seq_file *s, void *p) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_client_metric *m = &mdsc->metric; int nr_caps = 0; - s64 total, sum, avg, min, max, sq; + s64 total, sum, avg, min, max, stdev; u64 sum_sz, avg_sz, min_sz, max_sz; sum = percpu_counter_sum(&m->total_inodes); @@ -175,9 +175,9 @@ static int metric_show(struct seq_file *s, void *p) avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0; min = m->read_latency_min; max = m->read_latency_max; - sq = m->read_latency_sq_sum; + stdev = m->read_latency_stdev; spin_unlock(&m->read_metric_lock); - CEPH_LAT_METRIC_SHOW("read", total, avg, min, max, sq); + CEPH_LAT_METRIC_SHOW("read", total, avg, min, max, stdev); spin_lock(&m->write_metric_lock); total = m->total_writes; @@ -185,9 +185,9 @@ static int metric_show(struct seq_file *s, void *p) avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0; min = m->write_latency_min; max = m->write_latency_max; - sq = m->write_latency_sq_sum; + stdev = m->write_latency_stdev; spin_unlock(&m->write_metric_lock); - CEPH_LAT_METRIC_SHOW("write", total, avg, min, max, sq); + CEPH_LAT_METRIC_SHOW("write", total, avg, min, max, stdev); spin_lock(&m->metadata_metric_lock); total = m->total_metadatas; @@ -195,9 +195,9 @@ static int metric_show(struct seq_file *s, void *p) avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0; min = m->metadata_latency_min; max = m->metadata_latency_max; - sq = m->metadata_latency_sq_sum; + stdev = m->metadata_latency_stdev; spin_unlock(&m->metadata_metric_lock); - CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, sq); + CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, stdev); seq_printf(s, "\n"); seq_printf(s, "item total avg_sz(bytes) min_sz(bytes) max_sz(bytes) total_sz(bytes)\n"); diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 226dc38e2909..6b774b1a88ce 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -244,7 +244,8 @@ int ceph_metric_init(struct ceph_client_metric *m) goto err_i_caps_mis; spin_lock_init(&m->read_metric_lock); - m->read_latency_sq_sum = 0; + m->read_latency_stdev = 0; + m->avg_read_latency = 0; m->read_latency_min = KTIME_MAX; m->read_latency_max = 0; m->total_reads = 0; @@ -254,7 +255,8 @@ int ceph_metric_init(struct ceph_client_metric *m) m->read_size_sum = 0; spin_lock_init(&m->write_metric_lock); - m->write_latency_sq_sum = 0; + m->write_latency_stdev = 0; + m->avg_write_latency = 0; m->write_latency_min = KTIME_MAX; m->write_latency_max = 0; m->total_writes = 0; @@ -264,7 +266,8 @@ int ceph_metric_init(struct ceph_client_metric *m) m->write_size_sum = 0; spin_lock_init(&m->metadata_metric_lock); - m->metadata_latency_sq_sum = 0; + m->metadata_latency_stdev = 0; + m->avg_metadata_latency = 0; m->metadata_latency_min = KTIME_MAX; m->metadata_latency_max = 0; m->total_metadatas = 0; @@ -322,20 +325,26 @@ void ceph_metric_destroy(struct ceph_client_metric *m) max = new; \ } -static inline void __update_stdev(ktime_t total, ktime_t lsum, - ktime_t *sq_sump, ktime_t lat) +static inline void __update_latency(ktime_t *ctotal, ktime_t *lsum, + ktime_t *lavg, ktime_t *min, ktime_t *max, + ktime_t *lstdev, ktime_t lat) { - ktime_t avg, sq; + ktime_t total, avg, stdev; - if (unlikely(total == 1)) - return; + total = ++(*ctotal); + *lsum += lat; + + METRIC_UPDATE_MIN_MAX(*min, *max, lat); - /* the sq is (lat - old_avg) * (lat - new_avg) */ - avg = DIV64_U64_ROUND_CLOSEST((lsum - lat), (total - 1)); - sq = lat - avg; - avg = DIV64_U64_ROUND_CLOSEST(lsum, total); - sq = sq * (lat - avg); - *sq_sump += sq; + if (unlikely(total == 1)) { + *lavg = lat; + *lstdev = 0; + } else { + avg = *lavg + div64_s64(lat - *lavg, total); + stdev = *lstdev + (lat - *lavg)*(lat - avg); + *lstdev = int_sqrt(div64_u64(stdev, total - 1)); + *lavg = avg; + } } void ceph_update_read_metrics(struct ceph_client_metric *m, @@ -343,23 +352,18 @@ void ceph_update_read_metrics(struct ceph_client_metric *m, unsigned int size, int rc) { ktime_t lat = ktime_sub(r_end, r_start); - ktime_t total; if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT)) return; spin_lock(&m->read_metric_lock); - total = ++m->total_reads; m->read_size_sum += size; - m->read_latency_sum += lat; METRIC_UPDATE_MIN_MAX(m->read_size_min, m->read_size_max, size); - METRIC_UPDATE_MIN_MAX(m->read_latency_min, - m->read_latency_max, - lat); - __update_stdev(total, m->read_latency_sum, - &m->read_latency_sq_sum, lat); + __update_latency(&m->total_reads, &m->read_latency_sum, + &m->avg_read_latency, &m->read_latency_min, + &m->read_latency_max, &m->read_latency_stdev, lat); spin_unlock(&m->read_metric_lock); } @@ -368,23 +372,18 @@ void ceph_update_write_metrics(struct ceph_client_metric *m, unsigned int size, int rc) { ktime_t lat = ktime_sub(r_end, r_start); - ktime_t total; if (unlikely(rc && rc != -ETIMEDOUT)) return; spin_lock(&m->write_metric_lock); - total = ++m->total_writes; m->write_size_sum += size; - m->write_latency_sum += lat; METRIC_UPDATE_MIN_MAX(m->write_size_min, m->write_size_max, size); - METRIC_UPDATE_MIN_MAX(m->write_latency_min, - m->write_latency_max, - lat); - __update_stdev(total, m->write_latency_sum, - &m->write_latency_sq_sum, lat); + __update_latency(&m->total_writes, &m->write_latency_sum, + &m->avg_write_latency, &m->write_latency_min, + &m->write_latency_max, &m->write_latency_stdev, lat); spin_unlock(&m->write_metric_lock); } @@ -393,18 +392,13 @@ void ceph_update_metadata_metrics(struct ceph_client_metric *m, int rc) { ktime_t lat = ktime_sub(r_end, r_start); - ktime_t total; if (unlikely(rc && rc != -ENOENT)) return; spin_lock(&m->metadata_metric_lock); - total = ++m->total_metadatas; - m->metadata_latency_sum += lat; - METRIC_UPDATE_MIN_MAX(m->metadata_latency_min, - m->metadata_latency_max, - lat); - __update_stdev(total, m->metadata_latency_sum, - &m->metadata_latency_sq_sum, lat); + __update_latency(&m->total_metadatas, &m->metadata_latency_sum, + &m->avg_metadata_latency, &m->metadata_latency_min, + &m->metadata_latency_max, &m->metadata_latency_stdev, lat); spin_unlock(&m->metadata_metric_lock); } diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index 103ed736f9d2..a5da21b8f8ed 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -138,7 +138,8 @@ struct ceph_client_metric { u64 read_size_min; u64 read_size_max; ktime_t read_latency_sum; - ktime_t read_latency_sq_sum; + ktime_t avg_read_latency; + ktime_t read_latency_stdev; ktime_t read_latency_min; ktime_t read_latency_max; @@ -148,14 +149,16 @@ struct ceph_client_metric { u64 write_size_min; u64 write_size_max; ktime_t write_latency_sum; - ktime_t write_latency_sq_sum; + ktime_t avg_write_latency; + ktime_t write_latency_stdev; ktime_t write_latency_min; ktime_t write_latency_max; spinlock_t metadata_metric_lock; u64 total_metadatas; ktime_t metadata_latency_sum; - ktime_t metadata_latency_sq_sum; + ktime_t avg_metadata_latency; + ktime_t metadata_latency_stdev; ktime_t metadata_latency_min; ktime_t metadata_latency_max; -- 2.31.1