Re: [PATCH] Make "dev -d|-D" options use sbitmap on RHEL8

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Lianbo,

On 2022/06/08 22:27, lijiang wrote:
> Hi, Kazu
> Thank you for the fix.
> 
> On Tue, Jun 7, 2022 at 4:42 PM HAGIO KAZUHITO(萩尾 一仁) <k-hagio-ab@xxxxxxx <mailto:k-hagio-ab@xxxxxxx>> wrote:
> 
>     There have been some reports that the "dev -d|-D" options displayed
>     incorrect I/O stats due to racy blk_mq_ctx.rq_completed value.
>     To fix it, make the options use sbitmap to count I/O stats on RHEL8
>     and adjust to its blk_mq_tags structure.
> 
>     Signed-off-by: Kazuhito Hagio <k-hagio-ab@xxxxxxx <mailto:k-hagio-ab@xxxxxxx>>
>     ---
>     The patch tested ok with upstream 4.0 to 5.18 kernels and several
>     RHEL7 and RHEL8 ones.
> 
>        dev.c | 21 +++++++++++++++++++--
>        1 file changed, 19 insertions(+), 2 deletions(-)
> 
>     diff --git a/dev.c b/dev.c
>     index 0172c83ffaea..733e3a8a40cd 100644
>     --- a/dev.c
>     +++ b/dev.c
>     @@ -4339,6 +4339,10 @@ static void bt_for_each(ulong q, ulong tags, ulong sbq, uint reserved, uint nr_r
>        static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio *dio)
>        {
>              uint i;
>     +       int bitmap_tags_is_ptr = 0;
>     +
>     +       if (MEMBER_TYPE("blk_mq_tags", "bitmap_tags") == TYPE_CODE_PTR)
>     +               bitmap_tags_is_ptr = 1;
> 
> 
> The above change is related to the kernel(v5.10-rc1) commit:
> 222a5ae03cdd ("blk-mq: Use pointers for blk_mq_tags bitmap tags")
> 
> Could you please add it to the patch log?

Sure, I'll add them.  Thank you for the information.
I did not search for the upstream patches.

> 
> And later, kernel(v5.16-rc1) stops using pointers again:
> ae0f1a732f4a ("blk-mq: Stop using pointers for blk_mq_tags bitmap tags")
> 
>              for (i = 0; i < cnt; i++) {
>                      ulong addr = 0, tags = 0;
>     @@ -4357,9 +4361,17 @@ static void queue_for_each_hw_ctx(ulong q, ulong *hctx, uint cnt, struct diskio
> 
>                      if (nr_reserved_tags) {
>                              addr = tags + OFFSET(blk_mq_tags_breserved_tags);
>     +                       if (bitmap_tags_is_ptr &&
>     +                           !readmem(addr, KVADDR, &addr, sizeof(ulong),
>     +                                       "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
>     +                               break;
>                              bt_for_each(q, tags, addr, 1, nr_reserved_tags, dio);
>                      }
>                      addr = tags + OFFSET(blk_mq_tags_bitmap_tags);
>     +               if (bitmap_tags_is_ptr &&
>     +                   !readmem(addr, KVADDR, &addr, sizeof(ulong),
>     +                               "blk_mq_tags.bitmap_tags", RETURN_ON_ERROR))
>     +                       break;
>                      bt_for_each(q, tags, addr, 0, nr_reserved_tags, dio);
>              }
>        }
>     @@ -4423,8 +4435,13 @@ get_mq_diskio(unsigned long q, unsigned long *mq_count)
>              unsigned long mctx_addr;
>              struct diskio tmp = {0};
> 
>     -       if (INVALID_MEMBER(blk_mq_ctx_rq_dispatched) ||
>     -           INVALID_MEMBER(blk_mq_ctx_rq_completed)) {
>     +       /*
>     +        * Currently it does not support earlier sbitmap and blk-mq
>     +        * implementation e.g. RHEL7, so filter them out.
>     +        */
>     +       if (VALID_STRUCT(sbitmap) &&
>     +           VALID_MEMBER(sbitmap_word_cleared) &&
>     +           VALID_MEMBER(request_state)) {
> 
> 
> The struct sbitmap was introduced in the kernel v4.9-rc1:
> 88459642cba4 ("blk-mq: abstract tag allocation out into sbitmap library")
> 
> The member of cleared in the struct sbitmap_word was added in the kernel v5.0-rc1:
> ea86ea2cdced ("sbitmap: ammortize cost of clearing bits")
> 
> The member of state in struct request was added in the kernel v4.18-rc1:
> 12f5b9314545 ("blk-mq: Remove generation seqeunce")
> 
> It indicates that the current "dev -d" command  won't work on the vmcores generated with the kernel v5.0-rc1 and
> earlier(still use the rq_dispatched and rq_completed to count the IOs on the old vmcores). These three symbols
> exist in the different kernel versions and put them together, it seems to be confused. And it would increase the
> maintenance difficulty in the future.> 
> To be on the safe side, I would suggest doing that in the distribution, because the developers often backport some
> upstream patches into the distribution, it could be more reasonable. Any ideas?

sorry, but I cannot see what is confusing and causes maintenance difficulty.
Do you mean that we should use only one macro checking if the latest
supported version v5.0, i.e. VALID_MEMBER(sbitmap_word_cleared) here?

But as you say, distributions can backport only a part of upstream patches,
so having several checks is safer, I think.

Yes, VALID_STRUCT(sbitmap) is unnecessary and can be removed.  I added it
for readability, it says that we cannot use the function without sbitmap.
Is it better to remove this?


btw, I'm preparing a patch to support sbitmap without ea86ea2cdced,
to extend the supported version of 'sbitmapq' command. (attached)
We can fix it first if you think it's better.  And then, probably
we will keep only VALID_MEMBER(request_state).

Thanks,
Kazu
diff --git a/sbitmap.c b/sbitmap.c
index e8ebd62fe01c..fbded9089134 100644
--- a/sbitmap.c
+++ b/sbitmap.c
@@ -89,7 +89,6 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
 {
 	const ulong sbitmap_word_size = SIZE(sbitmap_word);
 	const ulong w_word_off = OFFSET(sbitmap_word_word);
-	const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
 
 	unsigned int weight = 0;
 	ulong addr = sc->map_addr;
@@ -111,7 +110,10 @@ static unsigned int __sbitmap_weight(const struct sbitmap_context *sc, bool set)
 			word = ULONG(sbitmap_word_buf + w_word_off);
 			weight += bitmap_weight(word, depth);
 		} else {
-			cleared = ULONG(sbitmap_word_buf + w_cleared_off);
+			if (VALID_MEMBER(sbitmap_word_cleared))
+				cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared));
+			else
+				cleared = 0;
 			weight += bitmap_weight(cleared, depth);
 		}
 
@@ -130,7 +132,10 @@ static unsigned int sbitmap_weight(const struct sbitmap_context *sc)
 
 static unsigned int sbitmap_cleared(const struct sbitmap_context *sc)
 {
-	return __sbitmap_weight(sc, false);
+	if (VALID_MEMBER(sbitmap_word_cleared)) /* 5.0 and later */
+		return __sbitmap_weight(sc, false);
+	else
+		return 0;
 }
 
 static void sbitmap_emit_byte(unsigned int offset, uint8_t byte)
@@ -149,7 +154,6 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
 {
 	const ulong sbitmap_word_size = SIZE(sbitmap_word);
 	const ulong w_word_off = OFFSET(sbitmap_word_word);
-	const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
 
 	uint8_t byte = 0;
 	unsigned int byte_bits = 0;
@@ -169,7 +173,10 @@ static void sbitmap_bitmap_show(const struct sbitmap_context *sc)
 		}
 
 		word = ULONG(sbitmap_word_buf + w_word_off);
-		cleared = ULONG(sbitmap_word_buf + w_cleared_off);
+		if (VALID_MEMBER(sbitmap_word_cleared))
+			cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared));
+		else
+			cleared = 0;
 		word_bits = __map_depth(sc, i);
 
 		word &= ~cleared;
@@ -219,7 +226,6 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
 {
 	const ulong sbitmap_word_size = SIZE(sbitmap_word);
 	const ulong w_word_off = OFFSET(sbitmap_word_word);
-	const ulong w_cleared_off = OFFSET(sbitmap_word_cleared);
 
 	unsigned int index;
 	unsigned int nr;
@@ -245,7 +251,10 @@ static void __sbitmap_for_each_set(const struct sbitmap_context *sc,
 		}
 
 		w_word = ULONG(sbitmap_word_buf + w_word_off);
-		w_cleared = ULONG(sbitmap_word_buf + w_cleared_off);
+		if (VALID_MEMBER(sbitmap_word_cleared))
+			w_cleared = ULONG(sbitmap_word_buf + OFFSET(sbitmap_word_cleared));
+		else
+			w_cleared = 0;
 
 		depth = min(__map_depth(sc, index) - nr, sc->depth - scanned);
 
@@ -297,7 +306,8 @@ static void sbitmap_queue_show(const struct sbitmap_queue_context *sqc,
 
 	fprintf(fp, "depth = %u\n", sc->depth);
 	fprintf(fp, "busy = %u\n", sbitmap_weight(sc) - sbitmap_cleared(sc));
-	fprintf(fp, "cleared = %u\n", sbitmap_cleared(sc));
+	if (VALID_MEMBER(sbitmap_word_cleared)) /* 5.0 and later */
+		fprintf(fp, "cleared = %u\n", sbitmap_cleared(sc));
 	fprintf(fp, "bits_per_word = %u\n", 1U << sc->shift);
 	fprintf(fp, "map_nr = %u\n", sc->map_nr);
 
-- 
2.27.0

--
Crash-utility mailing list
Crash-utility@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/crash-utility
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

[Index of Archives]     [Fedora Development]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]     [Fedora Tools]

 

Powered by Linux