From: Coly Li <bosong.ly@xxxxxxxxxxxxxxx> This patch adds I/O latency measurement code in dm.c. The basic idea is, 1) add timestamp in struct dm_io as unsigned long start_time_usec 2) set timestamp when dm_io issued to underlaying block layer. 3) when all sub-I/O completed, calculate time difference against start_time_usec as I/O latency. 4) account the latency to corresponding counters like, - latency_stats_us - latency_stats_ms - latency_stats_s Signed-off-by: Coly Li <bosong.ly@xxxxxxxxxxxxxxx> Reviewed-by: Robin Dong <sanbai@xxxxxxxxxxxxxxx> Reviewed-by: Tao Ma <boyu.mt@xxxxxxxxxx> --- drivers/md/dm.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ drivers/md/dm.h | 13 +++++++++++++ 2 files changed, 82 insertions(+) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e2532c8..200954a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -20,6 +20,7 @@ #include <linux/hdreg.h> #include <linux/delay.h> #include <linux/wait.h> +#include <linux/clocksource.h> #include <trace/events/block.h> @@ -67,6 +68,7 @@ struct dm_io { atomic_t io_count; struct bio *bio; unsigned long start_time; + unsigned long start_time_usec; spinlock_t endio_lock; struct dm_stats_aux stats_aux; }; @@ -521,14 +523,72 @@ static int md_in_flight(struct mapped_device *md) atomic_read(&md->pending[WRITE]); } +static unsigned long long us2msecs(unsigned long long usec) +{ + usec += 500; + do_div(usec, 1000); + return usec; +} + +static unsigned long long us2secs(unsigned long long usec) +{ + usec += 500; + do_div(usec, 1000); + usec += 500; + do_div(usec, 1000); + return usec; +} + +static void update_latency_stats(struct mapped_device *md, struct dm_io *io) +{ + uint64_t now, latency; + int idx; + ktime_t ts; + + ts = ktime_get(); + now = (uint64_t)ktime_to_us(ts); + + /* + * if now <= io->start_time_usec, it means counter + * in ktime_get() over flows, just ignore this I/O + */ + if (unlikely(now < io->start_time_usec)) + return; + + latency = (uint64_t)(now - io->start_time_usec); + if (latency < 1000) { + /* microseconds */ + idx = latency/DM_LATENCY_STATS_US_GRAINSIZE; + if (idx > (DM_LATENCY_STATS_US_NR - 1)) + idx = DM_LATENCY_STATS_US_NR - 1; + atomic_inc(&(md->latency_stats_us[idx])); + } else if (latency < 1000000) { + /* milliseconds */ + idx = us2msecs(latency)/DM_LATENCY_STATS_MS_GRAINSIZE; + if (idx > (DM_LATENCY_STATS_MS_NR - 1)) + idx = DM_LATENCY_STATS_MS_NR - 1; + atomic_inc(&(md->latency_stats_ms[idx])); + } else { + /* seconds */ + idx = us2secs(latency)/DM_LATENCY_STATS_S_GRAINSIZE; + if (idx > (DM_LATENCY_STATS_S_NR - 1)) + idx = DM_LATENCY_STATS_S_NR - 1; + atomic_inc(&(md->latency_stats_s[idx])); + } +} + static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; struct bio *bio = io->bio; + ktime_t ts; + int cpu; int rw = bio_data_dir(bio); io->start_time = jiffies; + ts = ktime_get(); + io->start_time_usec = (unsigned long)ktime_to_us(ts); cpu = part_stat_lock(); part_round_stats(cpu, &dm_disk(md)->part0); @@ -549,6 +609,7 @@ static void end_io_acct(struct dm_io *io) int pending; int rw = bio_data_dir(bio); + update_latency_stats(md, io); generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time); if (unlikely(dm_stats_used(&md->stats))) @@ -2118,6 +2179,14 @@ static struct mapped_device *alloc_dev(int minor) dm_stats_init(&md->stats); + /* initial latency stats buckets */ + for (r = 0; r < DM_LATENCY_STATS_S_NR; r++) + atomic_set(&(md->latency_stats_s[r]), 0); + for (r = 0; r < DM_LATENCY_STATS_MS_NR; r++) + atomic_set(&(md->latency_stats_ms[r]), 0); + for (r = 0; r < DM_LATENCY_STATS_US_NR; r++) + atomic_set(&(md->latency_stats_us[r]), 0); + /* Populate the mapping, nobody knows we exist yet */ spin_lock(&_minor_lock); old_md = idr_replace(&_minor_idr, md, minor); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 3014667..6c254f4 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -32,6 +32,14 @@ */ #define DM_STATUS_NOFLUSH_FLAG (1 << 0) +/* 100s is long enough to measure disk I/O latency */ +#define DM_LATENCY_STATS_S_NR 100 +#define DM_LATENCY_STATS_S_GRAINSIZE (1000/DM_LATENCY_STATS_S_NR) +#define DM_LATENCY_STATS_MS_NR 100 +#define DM_LATENCY_STATS_MS_GRAINSIZE (1000/DM_LATENCY_STATS_MS_NR) +#define DM_LATENCY_STATS_US_NR 100 +#define DM_LATENCY_STATS_US_GRAINSIZE (1000/DM_LATENCY_STATS_US_NR) + /* * Type of table and mapped_device's mempool */ @@ -135,6 +143,11 @@ struct mapped_device { struct kthread_worker kworker; struct task_struct *kworker_task; + + /* latency statistic buckets */ + atomic_t latency_stats_s[DM_LATENCY_STATS_S_NR]; + atomic_t latency_stats_ms[DM_LATENCY_STATS_MS_NR]; + atomic_t latency_stats_us[DM_LATENCY_STATS_US_NR]; }; /* -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel