few additional comments. On 2022/05/26 16:34, HAGIO KAZUHITO(萩尾 一仁) wrote: > Hi Lianbo, > > thank you for the update. Nice work, it became more readable :) > > On 2022/05/23 19:06, Lianbo Jiang wrote: >> Currently, crash doesn't support to display disk I/O statistics >> for blk-mq devices. For more details, please refer to the following >> commit: <98b417fc6346> ("Handle blk_mq_ctx member changes for kernels >> 5.16-rc1 and later"). >> >> Lets parse the bitmap in blk-mq layer to achieve it. >> >> Signed-off-by: Lianbo Jiang <lijiang@xxxxxxxxxx> >> --- >> defs.h | 12 +++ >> dev.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++-------- >> symbols.c | 24 +++++ >> 3 files changed, 289 insertions(+), 39 deletions(-) >> >> diff --git a/defs.h b/defs.h >> index ecbced24d2e3..c1626bc79d59 100644 >> --- a/defs.h >> +++ b/defs.h >> @@ -2170,6 +2170,17 @@ struct offset_table { /* stash of commonly-used offsets */ >> long sbq_wait_state_wait; >> long sbitmap_alloc_hint; >> long sbitmap_round_robin; >> + long request_q; >> + long request_cmd_flags; >> + long request_queue_queue_hw_ctx; >> + long request_queue_nr_hw_queues; >> + long blk_mq_hw_ctx_tags; >> + long blk_mq_hw_ctx_sched_tags; >> + long blk_mq_tags_bitmap_tags; >> + long blk_mq_tags_breserved_tags; >> + long blk_mq_tags_nr_reserved_tags; >> + long blk_mq_tags_rqs; >> + long blk_mq_tags_static_rqs; >> }; >> >> struct size_table { /* stash of commonly-used sizes */ >> @@ -2339,6 +2350,7 @@ struct size_table { /* stash of commonly-used sizes */ >> long sbitmap; >> long sbitmap_queue; >> long sbq_wait_state; >> + long blk_mq_tags; >> }; >> >> struct array_table { >> diff --git a/dev.c b/dev.c >> index a493e51ac95c..7e0b3d27888d 100644 >> --- a/dev.c >> +++ b/dev.c >> @@ -4238,19 +4238,216 @@ get_one_mctx_diskio(unsigned long mctx, struct diskio *io) >> io->write = (dispatch[1] - comp[1]); >> } >> >> +typedef uint (busy_tag_iter_fn)(ulong rq, void *data); >> + >> +struct bt_iter_data { >> + ulong q; >> + struct diskio *dio; >> +}; >> + >> +struct bt_tags_iter_data { >> + ulong tags; >> + uint flags; >> + uint nr_reserved_tags; >> + busy_tag_iter_fn *fn; >> + void *data; >> +}; >> + >> +static uint op_is_write(uint op) >> +{ >> +#define REQ_OP_BITS 8 >> +#define REQ_OP_MASK ((1 << REQ_OP_BITS) - 1) >> + >> + return (op & REQ_OP_MASK) & 1; >> +} >> + >> +static uint calculate_disk_io(ulong rq, void *data) >> +{ >> + uint cmd_flags = 0; >> + ulong addr = 0, queue = 0; >> + struct bt_iter_data *iter = data; >> + >> + if (!IS_KVADDR(rq)) >> + return 1; For confirmation, this 1 is intended/correct? And please use TRUE or FALSE for readability. >> + >> + addr = rq + OFFSET(request_q); >> + if (!readmem(addr, KVADDR, &queue, sizeof(ulong), "request.q", RETURN_ON_ERROR)) >> + return 0; >> + >> + addr = rq + OFFSET(request_cmd_flags); >> + if (!readmem(addr, KVADDR, &cmd_flags, sizeof(uint), "request.cmd_flags", RETURN_ON_ERROR)) >> + return 0; >> + >> + if (queue == iter->q) { >> + if (op_is_write(cmd_flags)) >> + iter->dio->write++; >> + else >> + iter->dio->read++; >> + } >> + >> + return 1; >> +} >> + >> +static bool bt_tags_iter(uint bitnr, void *data) >> +{ >> + uint ret = 0; Same as above. Thanks, kazu >> + ulong addr = 0, rqs_addr = 0, rq = 0; >> + struct bt_tags_iter_data *tag_iter = data; >> + ulong tag = tag_iter->tags; >> + >> + if (!tag_iter->flags) >> + bitnr += tag_iter->nr_reserved_tags; >> + >> + /* rqs */ >> + addr = tag + OFFSET(blk_mq_tags_rqs); >> + if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *), "blk_mq_tags.rqs", RETURN_ON_ERROR)) >> + return ret; >> + addr = rqs_addr + bitnr * sizeof(ulong); /* rqs[bitnr] */ >> + if (!readmem(addr, KVADDR, &rq, sizeof(ulong), "blk_mq_tags.rqs[]", RETURN_ON_ERROR)) >> + return ret; >> + ret = tag_iter->fn(rq, tag_iter->data); >> + if (!ret) >> + return ret; >> + > >> + /* static_rqs */ >> + ret = 0; >> + addr = tag + OFFSET(blk_mq_tags_static_rqs); >> + if (!readmem(addr, KVADDR, &rqs_addr, sizeof(void *), "blk_mq_tags.static_rqs", RETURN_ON_ERROR)) >> + return ret; >> + addr = rqs_addr + bitnr * sizeof(ulong); /* static_rqs[bitnr] */ >> + if (!readmem(addr, KVADDR, &rq, sizeof(ulong), "blk_mq_tags.static_rqs[]", RETURN_ON_ERROR)) >> + return ret; >> + ret = tag_iter->fn(rq, tag_iter->data); >> + >> + return ret; > > I'm not familiar with blk-mq, I'd like some information. > > I think that the "dev -d" displays the same inflight stats in /proc/diskstats > for devices without mq: > > diskstats_show > part_in_flight > count up per_cpu disk_stats->in_flight > > so, if we do the same thing for mq devices, we should imitate this path: > > diskstats_show > blk_mq_in_flight > blk_mq_queue_tag_busy_iter > queue_for_each_hw_ctx > bt_for_each > sbitmap_for_each_set > bt_iter > blk_mq_check_inflight > > On the path, I cannot see the static_rqs being counted. > Why does it need to be counted? > >> +} >> + >> +static void bt_tags_for_each(ulong q, ulong tags, ulong sbq, uint flags, uint nr_resvd_tags, struct diskio *dio) >> +{ >> + struct sbitmap_context sc = {0}; >> + struct diskio tmp = {0}; >> + struct bt_iter_data iter_data = { >> + .q = q, >> + .dio = &tmp, >> + }; >> + struct bt_tags_iter_data tags_iter_data = { >> + .tags = tags, >> + .flags = flags, >> + .nr_reserved_tags = nr_resvd_tags, >> + .fn = calculate_disk_io, >> + .data = &iter_data, >> + }; >> + >> + sbitmap_context_load(sbq + OFFSET(sbitmap_queue_sb), &sc); >> + sbitmap_for_each_set(&sc, bt_tags_iter, &tags_iter_data); >> + >> + dio->read = tmp.read; >> + dio->write = tmp.write; >> +} >> + >> +static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio) >> +{ >> + uint i; >> + struct diskio tmp = {0}; >> + >> + if (!hctx || !dio) >> + error(FATAL, "%s: Invalid parameter.\n", __func__); >> + >> + for (i = 0; i < cnt; i++) { >> + ulong addr = 0, tags = 0, sched_tags = 0; >> + uint nr_reserved_tags = 0; >> + >> + if (!IS_KVADDR(hctx[i])) >> + continue; >> + >> + /* Tags owned by the block driver */ >> + addr = hctx[i] + OFFSET(blk_mq_hw_ctx_tags); >> + if (!readmem(addr, KVADDR, &tags, sizeof(ulong), >> + "blk_mq_hw_ctx.tags", RETURN_ON_ERROR)) >> + break; >> + >> + addr = tags + OFFSET(blk_mq_tags_nr_reserved_tags); >> + if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint), >> + "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR)) >> + break; >> + >> + if (nr_reserved_tags) { >> + addr = tags + OFFSET(blk_mq_tags_breserved_tags); >> + bt_tags_for_each(q, tags, addr, 1, nr_reserved_tags, &tmp); >> + } >> + addr = tags + OFFSET(blk_mq_tags_bitmap_tags); >> + bt_tags_for_each(q, tags, addr, 0, nr_reserved_tags, &tmp); >> + > >> + /* Tags owned by I/O scheduler */ >> + addr = hctx[i] + OFFSET(blk_mq_hw_ctx_sched_tags); >> + if (!readmem(addr, KVADDR, &sched_tags, sizeof(ulong), >> + "blk_mq_hw_ctx.sched_tags", RETURN_ON_ERROR)) >> + break; >> + >> + addr = sched_tags + OFFSET(blk_mq_tags_nr_reserved_tags); >> + if (!readmem(addr, KVADDR, &nr_reserved_tags, sizeof(uint), >> + "blk_mq_tags_nr_reserved_tags", RETURN_ON_ERROR)) >> + break; >> + >> + if (nr_reserved_tags) { >> + addr = sched_tags + OFFSET(blk_mq_tags_breserved_tags); >> + bt_tags_for_each(q, sched_tags, addr, 1, nr_reserved_tags, &tmp); >> + } >> + addr = sched_tags + OFFSET(blk_mq_tags_bitmap_tags); >> + bt_tags_for_each(q, sched_tags, addr, 0, nr_reserved_tags, &tmp); > > Same as above, why does the sched_tags need to be counted? > >> + } >> + >> + dio->read = tmp.read; >> + dio->write = tmp.write; >> +} >> + >> +static void get_mq_diskio_from_hw_queues(ulong q, struct diskio *dio) >> +{ >> + uint cnt = 0; >> + ulong addr = 0, hctx_addr = 0; >> + ulong *hctx_array = NULL; >> + struct diskio tmp = {0}; >> + >> + addr = q + OFFSET(request_queue_nr_hw_queues); >> + readmem(addr, KVADDR, &cnt, sizeof(uint), >> + "request_queue.nr_hw_queues", FAULT_ON_ERROR); >> + >> + addr = q + OFFSET(request_queue_queue_hw_ctx); >> + readmem(addr, KVADDR, &hctx_addr, sizeof(void *), >> + "request_queue.queue_hw_ctx", FAULT_ON_ERROR); >> + >> + hctx_array = (ulong *)GETBUF(sizeof(void *) * cnt); >> + if (!hctx_array) >> + error(FATAL, "fail to get memory for the hctx_array\n"); >> + >> + if (!readmem(hctx_addr, KVADDR, hctx_array, sizeof(void *) * cnt, >> + "request_queue.queue_hw_ctx[]", RETURN_ON_ERROR)) { >> + FREEBUF(hctx_array); >> + return; >> + } >> + >> + queue_for_each_hw_ctx(q, hctx_array, cnt, &tmp); >> + dio->read = tmp.read; >> + dio->write = tmp.write; >> + >> + FREEBUF(hctx_array); >> +} >> + >> static void >> get_mq_diskio(unsigned long q, unsigned long *mq_count) >> { >> int cpu; >> unsigned long queue_ctx; >> unsigned long mctx_addr; >> - struct diskio tmp; >> + struct diskio tmp = {0}; >> >> if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) || >> - INVALID_MEMBER(blk_mq_ctx_rq_completed)) >> + INVALID_MEMBER(blk_mq_ctx_rq_completed)) { >> + get_mq_diskio_from_hw_queues(q, &tmp); >> + mq_count[0] = tmp.read; >> + mq_count[1] = tmp.write; >> return; >> - >> - memset(&tmp, 0x00, sizeof(struct diskio)); >> + } >> >> readmem(q + OFFSET(request_queue_queue_ctx), KVADDR, &queue_ctx, >> sizeof(ulong), "request_queue.queue_ctx", >> @@ -4479,41 +4676,24 @@ display_one_diskio(struct iter *i, unsigned long gendisk, ulong flags) >> && (io.read + io.write == 0)) >> return; >> >> - if (use_mq_interface(queue_addr) && >> - (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) || >> - INVALID_MEMBER(blk_mq_ctx_rq_completed))) >> - fprintf(fp, "%s%s%s %s%s%s%s %s%s%s", >> - mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major), >> - space(MINSPACE), >> - mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk), >> - space(MINSPACE), >> - mkstring(buf2, 10, LJUST, disk_name), >> - space(MINSPACE), >> - mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN, >> - LJUST|LONG_HEX, (char *)queue_addr), >> - space(MINSPACE), >> - mkstring(buf4, 17, RJUST, "(not supported)"), >> - space(MINSPACE)); >> - >> - else >> - fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s", >> - mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major), >> - space(MINSPACE), >> - mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk), >> - space(MINSPACE), >> - mkstring(buf2, 10, LJUST, disk_name), >> - space(MINSPACE), >> - mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN, >> - LJUST|LONG_HEX, (char *)queue_addr), >> - space(MINSPACE), >> - io.read + io.write, >> - space(MINSPACE), >> - mkstring(buf4, 5, RJUST|INT_DEC, >> - (char *)(unsigned long)io.read), >> - space(MINSPACE), >> - mkstring(buf5, 5, RJUST|INT_DEC, >> - (char *)(unsigned long)io.write), >> - space(MINSPACE)); >> + fprintf(fp, "%s%s%s %s%s%s%s %s%5d%s%s%s%s%s", >> + mkstring(buf0, 5, RJUST|INT_DEC, (char *)(unsigned long)major), >> + space(MINSPACE), >> + mkstring(buf1, VADDR_PRLEN, LJUST|LONG_HEX, (char *)gendisk), >> + space(MINSPACE), >> + mkstring(buf2, 10, LJUST, disk_name), >> + space(MINSPACE), >> + mkstring(buf3, VADDR_PRLEN <= 11 ? 11 : VADDR_PRLEN, >> + LJUST|LONG_HEX, (char *)queue_addr), >> + space(MINSPACE), >> + io.read + io.write, >> + space(MINSPACE), >> + mkstring(buf4, 5, RJUST|INT_DEC, >> + (char *)(unsigned long)io.read), >> + space(MINSPACE), >> + mkstring(buf5, 5, RJUST|INT_DEC, >> + (char *)(unsigned long)io.write), >> + space(MINSPACE)); >> >> if (VALID_MEMBER(request_queue_in_flight)) { >> if (!use_mq_interface(queue_addr)) { >> @@ -4603,15 +4783,49 @@ void diskio_init(void) >> MEMBER_OFFSET_INIT(request_queue_rq, "request_queue", "rq"); >> else >> MEMBER_OFFSET_INIT(request_queue_rq, "request_queue", "root_rl"); > >> + if (MEMBER_EXISTS("request", "q")) >> + MEMBER_OFFSET_INIT(request_q, "request", "q"); >> + if (MEMBER_EXISTS("request", "cmd_flags")) >> + MEMBER_OFFSET_INIT(request_cmd_flags, "request", "cmd_flags"); > > No need to have these "if MEMBER_EXISTS()", because it doesn't save > a bunch of MEMBER_OFFSET_INIT(). > >> if (MEMBER_EXISTS("request_queue", "mq_ops")) { >> MEMBER_OFFSET_INIT(request_queue_mq_ops, "request_queue", >> "mq_ops"); >> ANON_MEMBER_OFFSET_INIT(request_queue_queue_ctx, >> "request_queue", "queue_ctx"); >> + MEMBER_OFFSET_INIT(request_queue_queue_hw_ctx, >> + "request_queue", "queue_hw_ctx"); >> + MEMBER_OFFSET_INIT(request_queue_nr_hw_queues, >> + "request_queue", "nr_hw_queues"); >> MEMBER_OFFSET_INIT(blk_mq_ctx_rq_dispatched, "blk_mq_ctx", >> "rq_dispatched"); >> MEMBER_OFFSET_INIT(blk_mq_ctx_rq_completed, "blk_mq_ctx", >> "rq_completed"); >> + MEMBER_OFFSET_INIT(blk_mq_hw_ctx_tags, "blk_mq_hw_ctx", >> + "tags"); >> + MEMBER_OFFSET_INIT(blk_mq_hw_ctx_sched_tags, "blk_mq_hw_ctx", >> + "sched_tags"); >> + MEMBER_OFFSET_INIT(blk_mq_tags_bitmap_tags, "blk_mq_tags", >> + "bitmap_tags"); >> + MEMBER_OFFSET_INIT(blk_mq_tags_breserved_tags, "blk_mq_tags", >> + "breserved_tags"); >> + MEMBER_OFFSET_INIT(blk_mq_tags_nr_reserved_tags, "blk_mq_tags", >> + "nr_reserved_tags"); >> + MEMBER_OFFSET_INIT(blk_mq_tags_rqs, "blk_mq_tags", >> + "rqs"); >> + MEMBER_OFFSET_INIT(blk_mq_tags_static_rqs, "blk_mq_tags", >> + "static_rqs"); >> + STRUCT_SIZE_INIT(blk_mq_tags, "blk_mq_tags"); >> + STRUCT_SIZE_INIT(sbitmap, "sbitmap"); >> + STRUCT_SIZE_INIT(sbitmap_word, "sbitmap_word"); >> + MEMBER_OFFSET_INIT(sbitmap_word_depth, "sbitmap_word", "depth"); >> + MEMBER_OFFSET_INIT(sbitmap_word_word, "sbitmap_word", "word"); >> + MEMBER_OFFSET_INIT(sbitmap_word_cleared, "sbitmap_word", "cleared"); >> + MEMBER_OFFSET_INIT(sbitmap_depth, "sbitmap", "depth"); >> + MEMBER_OFFSET_INIT(sbitmap_shift, "sbitmap", "shift"); >> + MEMBER_OFFSET_INIT(sbitmap_map_nr, "sbitmap", "map_nr"); >> + MEMBER_OFFSET_INIT(sbitmap_map, "sbitmap", "map"); >> + MEMBER_OFFSET_INIT(sbitmap_queue_sb, "sbitmap_queue", "sb"); >> + >> } >> MEMBER_OFFSET_INIT(subsys_private_klist_devices, "subsys_private", >> "klist_devices"); >> diff --git a/symbols.c b/symbols.c >> index bfe506388fa3..c432469671a4 100644 >> --- a/symbols.c >> +++ b/symbols.c >> @@ -10385,6 +10385,10 @@ dump_offset_table(char *spec, ulong makestruct) >> OFFSET(kset_list)); >> fprintf(fp, " request_list_count: %ld\n", >> OFFSET(request_list_count)); >> + fprintf(fp, " request_q: %ld\n", >> + OFFSET(request_q)); >> + fprintf(fp, " request_cmd_flags: %ld\n", >> + OFFSET(request_cmd_flags)); >> fprintf(fp, " request_queue_in_flight: %ld\n", >> OFFSET(request_queue_in_flight)); >> fprintf(fp, " request_queue_rq: %ld\n", >> @@ -10393,10 +10397,29 @@ dump_offset_table(char *spec, ulong makestruct) >> OFFSET(request_queue_mq_ops)); >> fprintf(fp, " request_queue_queue_ctx: %ld\n", >> OFFSET(request_queue_queue_ctx)); >> + fprintf(fp, " request_queue_queue_hw_ctx: %ld\n", >> + OFFSET(request_queue_queue_hw_ctx)); >> + fprintf(fp, " request_queue_nr_hw_queues: %ld\n", >> + OFFSET(request_queue_nr_hw_queues)); >> fprintf(fp, " blk_mq_ctx_rq_dispatched: %ld\n", >> OFFSET(blk_mq_ctx_rq_dispatched)); >> fprintf(fp, " blk_mq_ctx_rq_completed: %ld\n", >> OFFSET(blk_mq_ctx_rq_completed)); >> + fprintf(fp, " blk_mq_hw_ctx_tags: %ld\n", >> + OFFSET(blk_mq_hw_ctx_tags)); >> + fprintf(fp, " blk_mq_hw_ctx_sched_tags: %ld\n", >> + OFFSET(blk_mq_hw_ctx_sched_tags)); >> + fprintf(fp, " blk_mq_tags_bitmap_tags: %ld\n", >> + OFFSET(blk_mq_tags_bitmap_tags)); >> + fprintf(fp, " blk_mq_tags_breserved_tags: %ld\n", >> + OFFSET(blk_mq_tags_breserved_tags)); >> + fprintf(fp, " blk_mq_tags_nr_reserved_tags: %ld\n", >> + OFFSET(blk_mq_tags_nr_reserved_tags)); >> + fprintf(fp, " blk_mq_tags_rqs: %ld\n", >> + OFFSET(blk_mq_tags_rqs)); >> + fprintf(fp, " blk_mq_tags_static_rqs: %ld\n", >> + OFFSET(blk_mq_tags_static_rqs)); > > Please arrange the position of colons: > > kobject_entry: 8 > kset_list: 0 > request_list_count: -1 > request_q: 0 > request_cmd_flags: 24 > ^ > > The current patch: > > kobject_entry: 8 > kset_list: 0 > request_list_count: -1 > request_q: 0 > request_cmd_flags: 24 > >> + >> fprintf(fp, " subsys_private_klist_devices: %ld\n", >> OFFSET(subsys_private_klist_devices)); >> fprintf(fp, " subsystem_kset: %ld\n", >> @@ -11003,6 +11026,7 @@ dump_offset_table(char *spec, ulong makestruct) >> fprintf(fp, " sbitmap: %ld\n", SIZE(sbitmap)); >> fprintf(fp, " sbitmap_queue: %ld\n", SIZE(sbitmap_queue)); >> fprintf(fp, " sbq_wait_state: %ld\n", SIZE(sbq_wait_state)); >> + fprintf(fp, " blk_mq_tags: %ld\n", SIZE(blk_mq_tags)); > > Same as above. > > Thanks, > Kazu > -- > Crash-utility mailing list > Crash-utility@xxxxxxxxxx > https://listman.redhat.com/mailman/listinfo/crash-utility > Contribution Guidelines: https://github.com/crash-utility/crash/wiki -- Crash-utility mailing list Crash-utility@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/crash-utility Contribution Guidelines: https://github.com/crash-utility/crash/wiki