Re: [PATCH RFC 4/5] block: add a statistic table for io latency

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jul 08, 2020 at 09:58:18AM +0200, Guoqing Jiang wrote:
> Usually, we get the status of block device by cat stat file,
> but we can only know the total time with that file. And we
> would like to know more accurate statistic, such as each
> latency range, which helps people to diagnose if there is
> issue about the hardware.
> 
> Also a new config option is introduced to control if people
> want to know the additional statistics or not, and we also
> use the option for io sector in next patch.
> 
> Signed-off-by: Florian-Ewald Mueller <florian-ewald.mueller@xxxxxxxxxxxxxxx>
> Signed-off-by: Guoqing Jiang <guoqing.jiang@xxxxxxxxxxxxxxx>
> ---
>  block/Kconfig             |  8 ++++++++
>  block/blk-core.c          | 35 +++++++++++++++++++++++++++++++++++
>  block/genhd.c             | 26 ++++++++++++++++++++++++++
>  include/linux/part_stat.h |  7 +++++++
>  4 files changed, 76 insertions(+)
> 
> diff --git a/block/Kconfig b/block/Kconfig
> index 9357d7302398..dba71feaa85b 100644
> --- a/block/Kconfig
> +++ b/block/Kconfig
> @@ -175,6 +175,14 @@ config BLK_DEBUG_FS
>  	Unless you are building a kernel for a tiny system, you should
>  	say Y here.
>  
> +config BLK_ADDITIONAL_DISKSTAT
> +	bool "Block layer additional diskstat"
> +	default n
> +	help
> +	Enabling this option adds io latency statistics for each block device.
> +
> +	If unsure, say N.
> +
>  config BLK_DEBUG_FS_ZONED
>         bool
>         default BLK_DEBUG_FS && BLK_DEV_ZONED
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 0e806a8c62fb..7a129c8f1b23 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1411,6 +1411,39 @@ static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
>  	}
>  }
>  
> +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
> +/*
> + * Either account additional stat for request if req is not NULL or account for bio.
> + */
> +static void blk_additional_latency(struct hd_struct *part, const int sgrp,
> +				   struct request *req, unsigned long start_ns)
> +{
> +	unsigned int idx;
> +	unsigned long duration, now = ktime_get_ns();
> +
> +	if (req)
> +		duration = (now - req->start_time_ns) / NSEC_PER_MSEC;
> +	else
> +		duration = (now - start_ns) / NSEC_PER_MSEC;
> +
> +	duration /= HZ_TO_MSEC_NUM;
> +	if (likely(duration > 0)) {
> +		idx = ilog2(duration);
> +		if (idx > ADD_STAT_NUM - 1)
> +			idx = ADD_STAT_NUM - 1;
> +	} else
> +		idx = 0;
> +	part_stat_inc(part, latency_table[idx][sgrp]);
> +
> +}
> +#else
> +static void blk_additional_latency(struct hd_struct *part, const int sgrp,
> +				   struct request *req, unsigned long start_jiffies)
> +
> +{
> +}
> +#endif
> +
>  static void blk_account_io_completion(struct request *req, unsigned int bytes)
>  {
>  	if (req->part && blk_do_io_stat(req)) {
> @@ -1440,6 +1473,7 @@ void blk_account_io_done(struct request *req, u64 now)
>  		part = req->part;
>  
>  		update_io_ticks(part, jiffies, true);
> +		blk_additional_latency(part, sgrp, req, 0);
>  		part_stat_inc(part, ios[sgrp]);
>  		part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
>  		part_stat_unlock();
> @@ -1489,6 +1523,7 @@ void disk_end_io_acct(struct gendisk *disk, unsigned int op,
>  
>  	part_stat_lock();
>  	update_io_ticks(part, now, true);
> +	blk_additional_latency(part, sgrp, NULL, start_time);
>  	part_stat_add(part, nsecs[sgrp], duration);
>  	part_stat_local_dec(part, in_flight[op_is_write(op)]);
>  	part_stat_unlock();
> diff --git a/block/genhd.c b/block/genhd.c
> index 60ae4e1b4d38..a33937a74fb1 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -1420,6 +1420,29 @@ static struct device_attribute dev_attr_fail_timeout =
>  	__ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
>  #endif
>  
> +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
> +static ssize_t io_latency_show(struct device *dev, struct device_attribute *attr, char *buf)
> +{
> +	struct hd_struct *p = dev_to_part(dev);
> +	size_t count = 0;
> +	int i, sgrp;
> +
> +	for (i = 0; i < ADD_STAT_NUM; i++) {
> +		count += scnprintf(buf + count, PAGE_SIZE - count, "%5d ms: ",
> +				   (1 << i) * HZ_TO_MSEC_NUM);
> +		for (sgrp = 0; sgrp < NR_STAT_GROUPS; sgrp++)
> +			count += scnprintf(buf + count, PAGE_SIZE - count, "%lu ",
> +					   part_stat_read(p, latency_table[i][sgrp]));
> +		count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
> +	}
> +
> +	return count;
> +}
> +
> +static struct device_attribute dev_attr_io_latency =
> +	__ATTR(io_latency, 0444, io_latency_show, NULL);
> +#endif
> +
>  static struct attribute *disk_attrs[] = {
>  	&dev_attr_range.attr,
>  	&dev_attr_ext_range.attr,
> @@ -1438,6 +1461,9 @@ static struct attribute *disk_attrs[] = {
>  #endif
>  #ifdef CONFIG_FAIL_IO_TIMEOUT
>  	&dev_attr_fail_timeout.attr,
> +#endif
> +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
> +	&dev_attr_io_latency.attr,
>  #endif
>  	NULL
>  };
> diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
> index 24125778ef3e..fe3def8c69d7 100644
> --- a/include/linux/part_stat.h
> +++ b/include/linux/part_stat.h
> @@ -9,6 +9,13 @@ struct disk_stats {
>  	unsigned long sectors[NR_STAT_GROUPS];
>  	unsigned long ios[NR_STAT_GROUPS];
>  	unsigned long merges[NR_STAT_GROUPS];
> +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
> +/*
> + * We measure latency (ms) for 1, 2, ..., 1024 and >=1024.
> + */
> +#define ADD_STAT_NUM	12
> +	unsigned long latency_table[ADD_STAT_NUM][NR_STAT_GROUPS];
> +#endif
>  	unsigned long io_ticks;
>  	local_t in_flight[2];
>  };
> -- 
> 2.17.1
> 

Hi Guoqing,

I believe it isn't hard to write a ebpf based script(bcc or bpftrace) to
collect this kind of performance data, so looks not necessary to do it
in kernel.

Thanks,
Ming




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux