From: Guoqing Jiang <guoqing.jiang@xxxxxxxxxxxxxxx> Usually, we get the status of block device by cat stat file, but we can only know the total time with that file. And we would like to know more accurate statistic, such as each latency range, which helps people to diagnose if there is issue about the hardware. Also a new config option is introduced to control if people want to know the additional statistics or not, and we use the option for io sector in next patch. This change is based on our internal patch from Florian-Ewald Mueller (florian-ewald.mueller@xxxxxxxxxxxxxxx). Signed-off-by: Guoqing Jiang <guoqing.jiang@xxxxxxxxxxxxxxx> --- Documentation/ABI/testing/sysfs-block | 8 ++++++ block/Kconfig | 8 ++++++ block/blk-core.c | 29 +++++++++++++++++++ block/genhd.c | 41 +++++++++++++++++++++++++++ include/linux/part_stat.h | 7 +++++ 5 files changed, 93 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index 2322eb748b38..1bae82238a79 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -23,6 +23,14 @@ Description: 17 - time spent flushing (ms) For more details refer Documentation/admin-guide/iostats.rst +What: /sys/block/<disk>/io_latency +Date: August 2020 +Contact: Guoqing Jiang <guoqing.jiang@xxxxxxxxxxxxxxx> +Description: + The /sys/block/<disk>/io_latency files displays the I/O + latency of disk <disk>. With it, it is convenient to know + the statistics of I/O latency for each type (read, write, + discard and flush)which have happened to the disk. What: /sys/block/<disk>/<part>/stat Date: February 2008 diff --git a/block/Kconfig b/block/Kconfig index bbad5e8bbffe..360f63111e2d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -176,6 +176,14 @@ config BLK_DEBUG_FS Unless you are building a kernel for a tiny system, you should say Y here. +config BLK_ADDITIONAL_DISKSTAT + bool "Block layer additional diskstat" + default n + help + Enabling this option adds io latency statistics for each block device. + + If unsure, say N. + config BLK_DEBUG_FS_ZONED bool default BLK_DEBUG_FS && BLK_DEV_ZONED diff --git a/block/blk-core.c b/block/blk-core.c index d9d632639bd1..fa339e0cc38a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1411,6 +1411,33 @@ static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) } } +/* + * Either account additional stat for request if req is not NULL or account for bio. + */ +static void blk_additional_latency(struct hd_struct *part, const int sgrp, + struct request *req, unsigned long start_jiffies) +{ +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT + unsigned int idx; + unsigned long duration, now = READ_ONCE(jiffies); + + if (req) + duration = jiffies_to_nsecs(now) - req->start_time_ns; + else + duration = jiffies_to_nsecs(now - start_jiffies); + + duration /= NSEC_PER_MSEC; + duration /= HZ_TO_MSEC_NUM; + if (likely(duration > 0)) { + idx = ilog2(duration); + if (idx > ADD_STAT_NUM - 1) + idx = ADD_STAT_NUM - 1; + } else + idx = 0; + part_stat_inc(part, latency_table[idx][sgrp]); +#endif +} + static void blk_account_io_completion(struct request *req, unsigned int bytes) { if (req->part && blk_do_io_stat(req)) { @@ -1440,6 +1467,7 @@ void blk_account_io_done(struct request *req, u64 now) part = req->part; update_io_ticks(part, jiffies, true); + blk_additional_latency(part, sgrp, req, 0); part_stat_inc(part, ios[sgrp]); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); part_stat_unlock(); @@ -1488,6 +1516,7 @@ void disk_end_io_acct(struct gendisk *disk, unsigned int op, part_stat_lock(); update_io_ticks(part, now, true); + blk_additional_latency(part, sgrp, NULL, start_time); part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_local_dec(part, in_flight[op_is_write(op)]); part_stat_unlock(); diff --git a/block/genhd.c b/block/genhd.c index 99c64641c314..21a3dae6a024 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1418,6 +1418,44 @@ static struct device_attribute dev_attr_fail_timeout = __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store); #endif +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT +static ssize_t io_latency_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + size_t count = 0; + int i, sgrp; + + for (i = 0; i < ADD_STAT_NUM; i++) { + unsigned int from, to; + + if (i == ADD_STAT_NUM - 1) { + count += scnprintf(buf + count, PAGE_SIZE - count, " >= %5d ms: ", + (2 << (i - 2)) * HZ_TO_MSEC_NUM); + } else { + if (i < 2) { + from = i; + to = i + 1; + } else { + from = 2 << (i - 2); + to = 2 << (i - 1); + } + count += scnprintf(buf + count, PAGE_SIZE - count, "[%5d - %-5d) ms: ", + from * HZ_TO_MSEC_NUM, to * HZ_TO_MSEC_NUM); + } + + for (sgrp = 0; sgrp < NR_STAT_GROUPS; sgrp++) + count += scnprintf(buf + count, PAGE_SIZE - count, "%lu ", + part_stat_read(p, latency_table[i][sgrp])); + count += scnprintf(buf + count, PAGE_SIZE - count, "\n"); + } + + return count; +} + +static struct device_attribute dev_attr_io_latency = + __ATTR(io_latency, 0444, io_latency_show, NULL); +#endif + static struct attribute *disk_attrs[] = { &dev_attr_range.attr, &dev_attr_ext_range.attr, @@ -1436,6 +1474,9 @@ static struct attribute *disk_attrs[] = { #endif #ifdef CONFIG_FAIL_IO_TIMEOUT &dev_attr_fail_timeout.attr, +#endif +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT + &dev_attr_io_latency.attr, #endif NULL }; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index 24125778ef3e..fe3def8c69d7 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -9,6 +9,13 @@ struct disk_stats { unsigned long sectors[NR_STAT_GROUPS]; unsigned long ios[NR_STAT_GROUPS]; unsigned long merges[NR_STAT_GROUPS]; +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT +/* + * We measure latency (ms) for 1, 2, ..., 1024 and >=1024. + */ +#define ADD_STAT_NUM 12 + unsigned long latency_table[ADD_STAT_NUM][NR_STAT_GROUPS]; +#endif unsigned long io_ticks; local_t in_flight[2]; }; -- 2.17.1