Re: [PATCH v2 1/2] Support for "dev -d|-D" options by parsing bitmap in blk-mq layer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Lianbo,

thank you for the update.  Nice work, it became more readable :)

On 2022/05/23 19:06, Lianbo Jiang wrote:
> Currently, crash doesn't support to display disk I/O statistics
> for blk-mq devices. For more details, please refer to the following
> commit: <98b417fc6346> ("Handle blk_mq_ctx member changes for kernels
> 5.16-rc1 and later").
> 
> Lets parse the bitmap in blk-mq layer to achieve it.
> 
> Signed-off-by: Lianbo Jiang <lijiang@xxxxxxxxxx>
> ---
>   defs.h    |  12 +++
>   dev.c     | 292 ++++++++++++++++++++++++++++++++++++++++++++++--------
>   symbols.c |  24 +++++
>   3 files changed, 289 insertions(+), 39 deletions(-)
> 
> diff --git a/defs.h b/defs.h
> index ecbced24d2e3..c1626bc79d59 100644
> --- a/defs.h
> +++ b/defs.h
> @@ -2170,6 +2170,17 @@ struct offset_table {                    /* stash of commonly-used offsets */
>   	long sbq_wait_state_wait;
>   	long sbitmap_alloc_hint;
>   	long sbitmap_round_robin;
> +	long request_q;
> +	long request_cmd_flags;
> +	long request_queue_queue_hw_ctx;
> +	long request_queue_nr_hw_queues;
> +	long blk_mq_hw_ctx_tags;
> +	long blk_mq_hw_ctx_sched_tags;
> +	long blk_mq_tags_bitmap_tags;
> +	long blk_mq_tags_breserved_tags;
> +	long blk_mq_tags_nr_reserved_tags;
> +	long blk_mq_tags_rqs;
> +	long blk_mq_tags_static_rqs;
>   };
>   
>   struct size_table {         /* stash of commonly-used sizes */
> @@ -2339,6 +2350,7 @@ struct size_table {         /* stash of commonly-used sizes */
>   	long sbitmap;
>   	long sbitmap_queue;
>   	long sbq_wait_state;
> +	long blk_mq_tags;
>   };
>   
>   struct array_table {
> diff --git a/dev.c b/dev.c
> index a493e51ac95c..7e0b3d27888d 100644
> --- a/dev.c
> +++ b/dev.c
> @@ -4238,19 +4238,216 @@ get_one_mctx_diskio(unsigned long mctx, struct diskio *io)
>   	io->write = (dispatch[1] - comp[1]);
>   }
>   
> +typedef uint (busy_tag_iter_fn)(ulong rq, void *data);
> +
> +struct bt_iter_data {
> +	ulong q;
> +	struct diskio *dio;
> +};
> +
> +struct bt_tags_iter_data {
> +	ulong tags;
> +	uint flags;
> +	uint nr_reserved_tags;
> +	busy_tag_iter_fn *fn;
> +	void *data;
> +};
> +
> +static uint op_is_write(uint op)
> +{
> +#define REQ_OP_BITS     8
> +#define REQ_OP_MASK     ((1 << REQ_OP_BITS) - 1)
> +
> +	return (op & REQ_OP_MASK) & 1;
> +}
> +
> +static uint calculate_disk_io(ulong rq, void *data)
> +{
> +	uint cmd_flags = 0;
> +	ulong addr = 0, queue = 0;
> +	struct bt_iter_data *iter = data;
> +
> +	if (!IS_KVADDR(rq))
> +		return 1;
> +
> +	addr = rq + OFFSET(request_q);
> +	if (!readmem(addr, KVADDR, &queue, sizeof(ulong), "request.q", RETURN_ON_ERROR))
> +		return 0;
> +
> +	addr = rq + OFFSET(request_cmd_flags);
> +	if (!readmem(addr, KVADDR, &cmd_flags, sizeof(uint), "request.cmd_flags", RETURN_ON_ERROR))
> +		return 0;
> +
> +	if (queue == iter->q) {
> +		if (op_is_write(cmd_flags))
> +			iter->dio->write++;
> +		else
> +			iter->dio->read++;
> +	}
> +
> +	return 1;
> +}
> +
> +static bool bt_tags_iter(uint bitnr, void *data)
> +{
> +	uint ret = 0;
> +	ulong addr = 0, rqs_addr = 0, rq = 0;
> +	struct bt_tags_iter_data *tag_iter = data;
> +	ulong tag = tag_iter->tags;
> +
> +	if (!tag_iter->flags)
> +		bitnr += tag_iter->nr_reserved_tags;
> +
> +	/* rqs */
> +	addr = tag + OFFSET(blk_mq_tags_rqs);
> +	if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *), "blk_mq_tags.rqs", RETURN_ON_ERROR))
> +		return ret;
> +	addr = rqs_addr + bitnr * sizeof(ulong); /* rqs[bitnr] */
> +	if (!readmem(addr, KVADDR, &rq, sizeof(ulong), "blk_mq_tags.rqs[]", RETURN_ON_ERROR))
> +		return ret;
> +	ret = tag_iter->fn(rq, tag_iter->data);
> +	if (!ret)
> +		return ret;
> +

> +	/* static_rqs */
> +	ret = 0;
> +	addr = tag + OFFSET(blk_mq_tags_static_rqs);
> +	if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *), "blk_mq_tags.static_rqs", RETURN_ON_ERROR))
> +		return ret;
> +	addr = rqs_addr + bitnr * sizeof(ulong); /* static_rqs[bitnr] */
> +	if (!readmem(addr, KVADDR, &rq, sizeof(ulong), "blk_mq_tags.static_rqs[]", RETURN_ON_ERROR))
> +		return ret;
> +	ret = tag_iter->fn(rq, tag_iter->data);
> +
> +	return ret;

I'm not familiar with blk-mq, I'd like some information.

I think that the "dev -d" displays the same inflight stats in /proc/diskstats
for devices without mq:

   diskstats_show
     part_in_flight
       count up per_cpu disk_stats->in_flight

so, if we do the same thing for mq devices, we should imitate this path:

   diskstats_show
     blk_mq_in_flight
       blk_mq_queue_tag_busy_iter
         queue_for_each_hw_ctx
           bt_for_each
             sbitmap_for_each_set
               bt_iter
                 blk_mq_check_inflight

On the path, I cannot see the static_rqs being counted.
Why does it need to be counted?

> +}
> +
> +static void bt_tags_for_each(ulong q, ulong tags, ulong sbq, uint flags, uint nr_resvd_tags, struct diskio *dio)
> +{
> +	struct sbitmap_context sc = {0};
> +	struct diskio tmp = {0};
> +	struct bt_iter_data iter_data = {
> +		.q = q,
> +		.dio = &tmp,
> +	};
> +	struct bt_tags_iter_data tags_iter_data = {
> +		.tags = tags,
> +		.flags = flags,
> +		.nr_reserved_tags = nr_resvd_tags,
> +		.fn = calculate_disk_io,
> +		.data = &iter_data,
> +	};
> +
> +	sbitmap_context_load(sbq + OFFSET(sbitmap_queue_sb), &sc);
> +	sbitmap_for_each_set(&sc, bt_tags_iter, &tags_iter_data);
> +
> +	dio->read = tmp.read;
> +	dio->write = tmp.write;
> +}
> +
> +static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
> +{
> +	uint i;
> +	struct diskio tmp = {0};
> +
> +	if (!hctx || !dio)
> +		error(FATAL, "%s: Invalid parameter.\n", __func__);
> +
> +	for (i = 0; i < cnt; i++) {
> +		ulong addr = 0, tags = 0, sched_tags = 0;
> +		uint nr_reserved_tags = 0;
> +
> +		if (!IS_KVADDR(hctx[i]))
> +			continue;
> +
> +		/* Tags owned by the block driver */
> +		addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags);
> +		if (!readmem(addr, KVADDR, &tags, sizeof(ulong),
> +				"blk_mq_hw_ctx.tags", RETURN_ON_ERROR))
> +			break;
> +
> +		addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags);
> +		if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
> +				"blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
> +			break;
> +
> +		if (nr_reserved_tags) {
> +			addr = tags + OFFSET(blk_mq_tags_breserved_tags);
> +			bt_tags_for_each(q, tags, addr, 1, nr_reserved_tags, &tmp);
> +		}
> +		addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
> +		bt_tags_for_each(q, tags, addr, 0, nr_reserved_tags, &tmp);
> +

> +		/* Tags owned by I/O scheduler */
> +		addr = hctx[i] + OFFSET(blk_mq_hw_ctx_sched_tags);
> +		if (!readmem(addr, KVADDR, &sched_tags, sizeof(ulong),
> +				"blk_mq_hw_ctx.sched_tags", RETURN_ON_ERROR))
> +			break;
> +
> +		addr = sched_tags + OFFSET(blk_mq_tags_nr_reserved_tags);
> +		if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint),
> +				"blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR))
> +			break;
> +
> +		if (nr_reserved_tags) {
> +			addr = sched_tags + OFFSET(blk_mq_tags_breserved_tags);
> +			bt_tags_for_each(q, sched_tags, addr, 1, nr_reserved_tags, &tmp);
> +		}
> +		addr = sched_tags + OFFSET(blk_mq_tags_bitmap_tags);
> +		bt_tags_for_each(q, sched_tags, addr, 0, nr_reserved_tags, &tmp);

Same as above, why does the sched_tags need to be counted?

> +	}
> +
> +	dio->read = tmp.read;
> +	dio->write = tmp.write;
> +}
> +
> +static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio)
> +{
> +	uint cnt = 0;
> +	ulong addr = 0, hctx_addr = 0;
> +	ulong *hctx_array = NULL;
> +	struct diskio tmp = {0};
> +
> +	addr = q + OFFSET(request_queue_nr_hw_queues);
> +	readmem(addr, KVADDR, &cnt, sizeof(uint),
> +		"request_queue.nr_hw_queues", FAULT_ON_ERROR);
> +
> +	addr = q + OFFSET(request_queue_queue_hw_ctx);
> +	readmem(addr, KVADDR, &hctx_addr, sizeof(void *),
> +		"request_queue.queue_hw_ctx", FAULT_ON_ERROR);
> +
> +	hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt);
> +	if (!hctx_array)
> +		error(FATAL, "fail to get memory for the hctx_array\n");
> +
> +	if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt,
> +			"request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) {
> +		FREEBUF(hctx_array);
> +		return;
> +	}
> +
> +	queue_for_each_hw_ctx(q, hctx_array, cnt, &tmp);
> +	dio->read = tmp.read;
> +	dio->write = tmp.write;
> +
> +	FREEBUF(hctx_array);
> +}
> +
>   static void
>   get_mq_diskio(unsigned long q, unsigned long *mq_count)
>   {
>   	int cpu;
>   	unsigned long queue_ctx;
>   	unsigned long mctx_addr;
> -	struct diskio tmp;
> +	struct diskio tmp = {0};
>   
>   	if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
> -	    INVALID_MEMBER(blk_mq_ctx_rq_completed))
> +	    INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
> +		get_mq_diskio_from_hw_queues(q, &tmp);
> +		mq_count[0] = tmp.read;
> +		mq_count[1] = tmp.write;
>   		return;
> -
> -	memset(&tmp, 0x00, sizeof(struct diskio));
> +	}
>   
>   	readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx,
>   		sizeof(ulong), "request_queue.queue_ctx",
> @@ -4479,41 +4676,24 @@ display_one_diskio(struct iter *i, unsigned long gendisk, ulong flags)
>   		&& (io.read + io.write == 0))
>   		return;
>   
> -	if (use_mq_interface(queue_addr) &&
> -	    (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
> -	     INVALID_MEMBER(blk_mq_ctx_rq_completed)))
> -		fprintf(fp, "%s%s%s  %s%s%s%s  %s%s%s",
> -			mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
> -			space(MINSPACE),
> -			mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
> -			space(MINSPACE),
> -			mkstring(buf2, 10, LJUST, disk_name),
> -			space(MINSPACE),
> -			mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
> -				 LJUST|LONG_HEX, (char *)queue_addr),
> -			space(MINSPACE),
> -			mkstring(buf4, 17, RJUST, "(not supported)"),
> -			space(MINSPACE));
> -
> -	else
> -		fprintf(fp, "%s%s%s  %s%s%s%s  %s%5d%s%s%s%s%s",
> -			mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
> -			space(MINSPACE),
> -			mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
> -			space(MINSPACE),
> -			mkstring(buf2, 10, LJUST, disk_name),
> -			space(MINSPACE),
> -			mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
> -				 LJUST|LONG_HEX, (char *)queue_addr),
> -			space(MINSPACE),
> -			io.read + io.write,
> -			space(MINSPACE),
> -			mkstring(buf4, 5, RJUST|INT_DEC,
> -				(char *)(unsigned long)io.read),
> -			space(MINSPACE),
> -			mkstring(buf5, 5, RJUST|INT_DEC,
> -				(char *)(unsigned long)io.write),
> -			space(MINSPACE));
> +	fprintf(fp, "%s%s%s  %s%s%s%s  %s%5d%s%s%s%s%s",
> +		mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major),
> +		space(MINSPACE),
> +		mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk),
> +		space(MINSPACE),
> +		mkstring(buf2, 10, LJUST, disk_name),
> +		space(MINSPACE),
> +		mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN,
> +			 LJUST|LONG_HEX, (char *)queue_addr),
> +		space(MINSPACE),
> +		io.read + io.write,
> +		space(MINSPACE),
> +		mkstring(buf4, 5, RJUST|INT_DEC,
> +			(char *)(unsigned long)io.read),
> +		space(MINSPACE),
> +		mkstring(buf5, 5, RJUST|INT_DEC,
> +			(char *)(unsigned long)io.write),
> +		space(MINSPACE));
>   
>   	if (VALID_MEMBER(request_queue_in_flight)) {
>   		if (!use_mq_interface(queue_addr)) {
> @@ -4603,15 +4783,49 @@ void diskio_init(void)
>   		MEMBER_OFFSET_INIT(request_queue_rq, "request_queue", "rq");
>   	else
>   		MEMBER_OFFSET_INIT(request_queue_rq, "request_queue", "root_rl");

> +	if (MEMBER_EXISTS("request", "q"))
> +		MEMBER_OFFSET_INIT(request_q, "request", "q");
> +	if (MEMBER_EXISTS("request", "cmd_flags"))
> +		MEMBER_OFFSET_INIT(request_cmd_flags, "request", "cmd_flags");

No need to have these "if MEMBER_EXISTS()", because it doesn't save
a bunch of MEMBER_OFFSET_INIT().

>   	if (MEMBER_EXISTS("request_queue", "mq_ops")) {
>   		MEMBER_OFFSET_INIT(request_queue_mq_ops, "request_queue",
>   			"mq_ops");
>   		ANON_MEMBER_OFFSET_INIT(request_queue_queue_ctx,
>   			"request_queue", "queue_ctx");
> +		MEMBER_OFFSET_INIT(request_queue_queue_hw_ctx,
> +			"request_queue", "queue_hw_ctx");
> +		MEMBER_OFFSET_INIT(request_queue_nr_hw_queues,
> +			"request_queue", "nr_hw_queues");
>   		MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx",
>   			"rq_dispatched");
>   		MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx",
>   			"rq_completed");
> +		MEMBER_OFFSET_INIT(blk_mq_hw_ctx_tags, "blk_mq_hw_ctx",
> +			"tags");
> +		MEMBER_OFFSET_INIT(blk_mq_hw_ctx_sched_tags, "blk_mq_hw_ctx",
> +			"sched_tags");
> +		MEMBER_OFFSET_INIT(blk_mq_tags_bitmap_tags, "blk_mq_tags",
> +			"bitmap_tags");
> +		MEMBER_OFFSET_INIT(blk_mq_tags_breserved_tags, "blk_mq_tags",
> +			"breserved_tags");
> +		MEMBER_OFFSET_INIT(blk_mq_tags_nr_reserved_tags, "blk_mq_tags",
> +			"nr_reserved_tags");
> +		MEMBER_OFFSET_INIT(blk_mq_tags_rqs, "blk_mq_tags",
> +			"rqs");
> +		MEMBER_OFFSET_INIT(blk_mq_tags_static_rqs, "blk_mq_tags",
> +			"static_rqs");
> +		STRUCT_SIZE_INIT(blk_mq_tags, "blk_mq_tags");
> +		STRUCT_SIZE_INIT(sbitmap, "sbitmap");
> +		STRUCT_SIZE_INIT(sbitmap_word, "sbitmap_word");
> +		MEMBER_OFFSET_INIT(sbitmap_word_depth, "sbitmap_word", "depth");
> +		MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word");
> +		MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared");
> +		MEMBER_OFFSET_INIT(sbitmap_depth, "sbitmap", "depth");
> +		MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift");
> +		MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr");
> +		MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map");
> +		MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb");
> +
>   	}
>   	MEMBER_OFFSET_INIT(subsys_private_klist_devices, "subsys_private",
>   		"klist_devices");
> diff --git a/symbols.c b/symbols.c
> index bfe506388fa3..c432469671a4 100644
> --- a/symbols.c
> +++ b/symbols.c
> @@ -10385,6 +10385,10 @@ dump_offset_table(char *spec, ulong makestruct)
>   		OFFSET(kset_list));
>   	fprintf(fp, "            request_list_count: %ld\n",
>   		OFFSET(request_list_count));
> +	fprintf(fp, "            request_q: %ld\n",
> +		OFFSET(request_q));
> +	fprintf(fp, "            request_cmd_flags: %ld\n",
> +		OFFSET(request_cmd_flags));
>   	fprintf(fp, "       request_queue_in_flight: %ld\n",
>   		OFFSET(request_queue_in_flight));
>   	fprintf(fp, "              request_queue_rq: %ld\n",
> @@ -10393,10 +10397,29 @@ dump_offset_table(char *spec, ulong makestruct)
>   		OFFSET(request_queue_mq_ops));
>   	fprintf(fp, "       request_queue_queue_ctx: %ld\n",
>   		OFFSET(request_queue_queue_ctx));
> +	fprintf(fp, "       request_queue_queue_hw_ctx: %ld\n",
> +		OFFSET(request_queue_queue_hw_ctx));
> +	fprintf(fp, "       request_queue_nr_hw_queues: %ld\n",
> +		OFFSET(request_queue_nr_hw_queues));
>   	fprintf(fp, "      blk_mq_ctx_rq_dispatched: %ld\n",
>   		OFFSET(blk_mq_ctx_rq_dispatched));
>   	fprintf(fp, "       blk_mq_ctx_rq_completed: %ld\n",
>   		OFFSET(blk_mq_ctx_rq_completed));
> +	fprintf(fp, "       blk_mq_hw_ctx_tags: %ld\n",
> +		OFFSET(blk_mq_hw_ctx_tags));
> +	fprintf(fp, "       blk_mq_hw_ctx_sched_tags: %ld\n",
> +		OFFSET(blk_mq_hw_ctx_sched_tags));
> +	fprintf(fp, "       blk_mq_tags_bitmap_tags: %ld\n",
> +		OFFSET(blk_mq_tags_bitmap_tags));
> +	fprintf(fp, "       blk_mq_tags_breserved_tags: %ld\n",
> +		OFFSET(blk_mq_tags_breserved_tags));
> +	fprintf(fp, "       blk_mq_tags_nr_reserved_tags: %ld\n",
> +		OFFSET(blk_mq_tags_nr_reserved_tags));
> +	fprintf(fp, "       blk_mq_tags_rqs: %ld\n",
> +		OFFSET(blk_mq_tags_rqs));
> +	fprintf(fp, "       blk_mq_tags_static_rqs: %ld\n",
> +		OFFSET(blk_mq_tags_static_rqs));

Please arrange the position of colons:

                  kobject_entry: 8
                      kset_list: 0
             request_list_count: -1
                      request_q: 0
              request_cmd_flags: 24
                               ^

The current patch:

                 kobject_entry: 8
                      kset_list: 0
             request_list_count: -1
             request_q: 0
             request_cmd_flags: 24

> +
>   	fprintf(fp, "  subsys_private_klist_devices: %ld\n",
>   		OFFSET(subsys_private_klist_devices));
>   	fprintf(fp, "                subsystem_kset: %ld\n",
> @@ -11003,6 +11026,7 @@ dump_offset_table(char *spec, ulong makestruct)
>   	fprintf(fp, "                       sbitmap: %ld\n", SIZE(sbitmap));
>   	fprintf(fp, "                 sbitmap_queue: %ld\n", SIZE(sbitmap_queue));
>   	fprintf(fp, "                sbq_wait_state: %ld\n", SIZE(sbq_wait_state));
> +	fprintf(fp, "                blk_mq_tags: %ld\n", SIZE(blk_mq_tags));

Same as above.

Thanks,
Kazu
--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki




[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux