Re: [bug report] WARNING at block/blk-merge.c:607 __blk_rq_map_sg+0xf0/0x110 and BUG at drivers/scsi/scsi_lib.c:1160! on linux-block/for-next

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 7/8/24 23:21, Yi Zhang wrote:
> Hello
> blktests block/032 triggered the below issue on commit [1] during CKI
> tests, please help check it, thanks.
>
> [1]
> https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git  @ for-next
> Commit Hash: 7f8851d381f71952787db6a1a71bef4d286f3df0
> https://datawarehouse.cki-project.org/kcidb/checkouts/redhat:1364149314

I've seen the same thing :-

block/for-next branch resulting in Oops [1].
Following might be the commit causing the issue :-

Author: Christoph Hellwig <hch@xxxxxx>
Date:   Sat Jul 6 09:52:18 2024 +0200

     block: pass a phys_addr_t to get_max_segment_size

* First warning is coming from :-

  610 int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
  611                 struct scatterlist *sglist, struct scatterlist 
**last_sg)
  612 {
  613         int nsegs = 0;
  614
  615         if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
  616                 nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, 
last_sg);
  617         else if (rq->bio)
  618                 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, 
last_sg);
  619
  620         if (*last_sg)
  621                 sg_mark_end(*last_sg);
  622
  623         /*
  624          * Something must have been wrong if the figured number of
  625          * segment is bigger than number of req's physical segments
  626          */
----> 627     WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
  628
  629         return nsegs;
  630 }


* Second warning from scsi driver :-

scsi_alloc_sgtables()
1160         BUG_ON(count > cmd->sdb.table.nents);


Looking at blk_bvec_map_sg() the calculation at the end of the loop :-

     nbytes -= len;

is associated with get_max_segment_size() that is from the commit
mentioned above and hence loop is not terminating correctly with the
right nbytes value leading to wrong nr segment count.

Please have a look, below is the log with debug patch for reference.

-ck

[1]
<6>[    9.958970] blk_bvec_map_sg 492---->
<6>[    9.958975]  817 blk_bvec_map_sg 499 total 0 nbytes 8192 nsegs 0 
len 8192
<6>[    9.958977] 817 blk_bvec_map_sg 514
<6>[    9.958977] 817 blk_bvec_map_sg 517
<6>[    9.958978] 817 blk_bvec_map_sg 519
<6>[    9.958979] 817 blk_bvec_map_sg 521 total 0 nbytes 8192 nsegs 0 
len 8192
<6>[    9.958980] 817 blk_bvec_map_sg 528 total 8192 nbytes 0 nsegs 1 
len 8192
<6>[    9.958981] 817 blk_bvec_map_sg 533 total 8192 nbytes 0 nsegs 1
<6>[    9.958982] blk_bvec_map_sg 536<-----
<6>[    9.958982] blk_bvec_map_sg 492---->
<6>[    9.958983]  817 blk_bvec_map_sg 499 total 0 nbytes 8192 nsegs 0 
len 8192
<6>[    9.958983] 817 blk_bvec_map_sg 514
<6>[    9.958984] 817 blk_bvec_map_sg 517
<6>[    9.958984] 817 blk_bvec_map_sg 519
<6>[    9.958985] 817 blk_bvec_map_sg 521 total 0 nbytes 8192 nsegs 0 
len 8192
<6>[    9.958986] 817 blk_bvec_map_sg 528 total 8192 nbytes 0 nsegs 1 
len 8192
<6>[    9.958987] 817 blk_bvec_map_sg 533 total 8192 nbytes 0 nsegs 1
<6>[    9.958988] blk_bvec_map_sg 536<-----
<6>[    9.958988] blk_bvec_map_sg 492---->
<6>[    9.958988]  817 blk_bvec_map_sg 499 total 0 nbytes 8192 nsegs 0 
len 8192
<6>[    9.958989] 817 blk_bvec_map_sg 514
<6>[    9.958990] 817 blk_bvec_map_sg 517
<6>[    9.958990] 817 blk_bvec_map_sg 519
<6>[    9.958991] 817 blk_bvec_map_sg 521 total 0 nbytes 8192 nsegs 0 
len 8192
<6>[    9.958992] 817 blk_bvec_map_sg 528 total 8192 nbytes 0 nsegs 1 
len 8192
<6>[    9.958993] 817 blk_bvec_map_sg 533 total 8192 nbytes 0 nsegs 1
<6>[    9.958993] blk_bvec_map_sg 536<-----

<6>[    9.959343] blk_bvec_map_sg 492---->
<6>[    9.959350]  875 blk_bvec_map_sg 499 total 0 nbytes 24576 nsegs 0 
len 8192
<6>[    9.959353] 875 blk_bvec_map_sg 514
<6>[    9.959355] 875 blk_bvec_map_sg 517
<6>[    9.959356] 875 blk_bvec_map_sg 519
<6>[    9.959358] 875 blk_bvec_map_sg 521 total 0 nbytes 24576 nsegs 0 
len 8192
<6>[    9.959360] 875 blk_bvec_map_sg 528 total 8192 nbytes 16384 nsegs 
1 len 8192
<6>[    9.959362]  875 blk_bvec_map_sg 499 total 8192 nbytes 16384 nsegs 
1 len 8192
<6>[    9.959364] 875 blk_bvec_map_sg 514
<6>[    9.959365] 875 blk_bvec_map_sg 517
<6>[    9.959366] 875 blk_bvec_map_sg 519
<6>[    9.959366] 875 blk_bvec_map_sg 521 total 8192 nbytes 16384 nsegs 
1 len 8192
<6>[    9.959368] 875 blk_bvec_map_sg 528 total 16384 nbytes 8192 nsegs 
2 len 8192
<6>[    9.959370]  875 blk_bvec_map_sg 499 total 16384 nbytes 8192 nsegs 
2 len 8192
<6>[    9.959372] 875 blk_bvec_map_sg 514
<6>[    9.959373] 875 blk_bvec_map_sg 517
<6>[    9.959373] 875 blk_bvec_map_sg 519
<6>[    9.959374] 875 blk_bvec_map_sg 521 total 16384 nbytes 8192 nsegs 
2 len 8192
<6>[    9.959376] 875 blk_bvec_map_sg 528 total 24576 nbytes 0 nsegs 3 
len 8192
<6>[    9.959377] 875 blk_bvec_map_sg 533 total 24576 nbytes 0 nsegs 3
<6>[    9.959378] blk_bvec_map_sg 536<-----

<4>[    9.959395] ------------[ cut here ]------------
<4>[    9.959397] WARNING: CPU: 8 PID: 875 at block/blk-merge.c:627 
__blk_rq_map_sg+0x5f2/0x620
<4>[    9.959402] Modules linked in: failover(+) sha256_ssse3(+) 
sha1_ssse3 serio_raw dimlib virtio_blk drm(+) ata_generic pata_acpi 
qemu_fw_cfg ipmi_devintf ipmi_msghandler fuse
<4>[    9.959412] CPU: 8 PID: 875 Comm: kworker/8:1H Tainted: 
G                 N 6.10.0-rc6lblk+ #34
<4>[    9.959415] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 
BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
<4>[    9.959417] Workqueue: kblockd blk_mq_run_work_fn
<4>[    9.959421] RIP: 0010:__blk_rq_map_sg+0x5f2/0x620

<4>[    9.959425] RSP: 0018:ffffc9000147fb50 EFLAGS: 00010202
<4>[    9.959427] RAX: 0000000000000002 RBX: 0000000000000003 RCX: 
0000000000000001
<4>[    9.959428] RDX: ffff88810f879260 RSI: ffffffff8278ffa7 RDI: 
ffff88810f879000
<4>[    9.959430] RBP: ffffea00043e7c80 R08: ffff88983ff397a8 R09: 
00000000ffffbfff
<4>[    9.959431] R10: ffff8897df0a0000 R11: ffff88983fed97c0 R12: 
0000000000000000
<4>[    9.959432] R13: 0000000000006000 R14: ffff88810ea1d100 R15: 
ffffc9000147fc08
<4>[    9.959436] FS:  0000000000000000(0000) GS:ffff8897df400000(0000) 
knlGS:0000000000000000
<4>[    9.959438] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[    9.959439] CR2: 00007f7920032000 CR3: 000000017bf7c000 CR4: 
0000000000350ef0
<4>[    9.959442] DR0: ffffffff837f7300 DR1: ffffffff837f7301 DR2: 
ffffffff837f7302
<4>[    9.959444] DR3: ffffffff837f7303 DR6: 00000000ffff0ff0 DR7: 
0000000000000600
<4>[    9.959445] Call Trace:
<4>[    9.959457]  <TASK>
<4>[    9.959459]  ? __warn+0x7f/0x120
<4>[    9.959462]  ? __blk_rq_map_sg+0x5f2/0x620
<4>[    9.959464]  ? report_bug+0x1c3/0x1d0
<4>[    9.959468]  ? handle_bug+0x42/0x70
<4>[    9.959471]  ? exc_invalid_op+0x14/0x70
<4>[    9.959473]  ? asm_exc_invalid_op+0x16/0x20
<4>[    9.959477]  ? __blk_rq_map_sg+0x5f2/0x620
<4>[    9.959480]  scsi_alloc_sgtables+0xb8/0x3f0
<4>[    9.959484]  sd_init_command+0x17c/0xb40
<4>[    9.959487]  ? sbitmap_find_bit+0x93/0x170
<4>[    9.959491]  scsi_queue_rq+0x819/0xc40
<4>[    9.959493]  blk_mq_dispatch_rq_list+0x28c/0x740
<4>[    9.959498]  ? sysvec_kvm_asyncpf_interrupt+0xc1/0xd0
<4>[    9.959502] __blk_mq_sched_dispatch_requests+0xb7/0x5f0
<4>[    9.959505]  ? __perf_event_task_sched_out+0x35/0x460
<4>[    9.959509]  ? finish_task_switch.isra.0+0x9e/0x2e0
<4>[    9.959513]  blk_mq_sched_dispatch_requests+0x2d/0x60
<4>[    9.959516]  blk_mq_run_work_fn+0x60/0x70
<4>[    9.959519]  process_one_work+0x158/0x360
<4>[    9.959523]  worker_thread+0x2e7/0x400
<4>[    9.959526]  ? __pfx_worker_thread+0x10/0x10
<4>[    9.959529]  kthread+0xdb/0x110
<4>[    9.959531]  ? __pfx_kthread+0x10/0x10
<4>[    9.959533]  ret_from_fork+0x2d/0x50
<4>[    9.959537]  ? __pfx_kthread+0x10/0x10
<4>[    9.959539]  ret_from_fork_asm+0x1a/0x30
<4>[    9.959544]  </TASK>
<4>[    9.959545] ---[ end trace 0000000000000000 ]---


<4>[    9.959558] ------------[ cut here ]------------
<2>[    9.959559] kernel BUG at drivers/scsi/scsi_lib.c:1160!
<6>[    9.959726] ACPI: bus type drm_connector registered

[8]kdb>


[2] Debug patch :-

 From e8f4ec280cae5d0741cb7080cc3a0fbbfd20e0d7 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <kch@xxxxxxxxxx>
Date: Mon, 8 Jul 2024 20:39:59 -0700
Subject: [PATCH] block: blk_bvec_ma_sg() debug

Signed-off-by: Chaitanya Kulkarni <kch@xxxxxxxxxx>
---
  block/blk-merge.c | 20 ++++++++++++++++++++
  1 file changed, 20 insertions(+)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index e41ea3318099..d0932aff4fee 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -489,12 +489,17 @@ static unsigned blk_bvec_map_sg(struct 
request_queue *q,
      unsigned nbytes = bvec->bv_len;
      unsigned nsegs = 0, total = 0;

+    pr_info("%s %d---->\n", __func__, __LINE__);
      while (nbytes > 0) {
          unsigned offset = bvec->bv_offset + total;
          unsigned len = get_max_segment_size(&q->limits, bvec_phys(bvec),
              nbytes);
          struct page *page = bvec->bv_page;

+        pr_info(" %d %s %d total %u nbytes %u nsegs %u len %u \n",
+                current->pid, __func__, __LINE__,
+                total, nbytes, nsegs, len);
+
          /*
           * Unfortunately a fair number of drivers barf on scatterlists
           * that have an offset larger than PAGE_SIZE, despite other
@@ -506,14 +511,29 @@ static unsigned blk_bvec_map_sg(struct 
request_queue *q,
          page += (offset >> PAGE_SHIFT);
          offset &= ~PAGE_MASK;

+        pr_info("%d %s %d\n", current->pid, __func__, __LINE__);
          *sg = blk_next_sg(sg, sglist);
+
+        pr_info("%d %s %d\n", current->pid, __func__, __LINE__);
          sg_set_page(*sg, page, len, offset);
+        pr_info("%d %s %d\n", current->pid, __func__, __LINE__);
+
+        pr_info("%d %s %d total %u nbytes %u nsegs %u len %u \n",
+                current->pid, __func__, __LINE__,
+                total, nbytes, nsegs, len);

          total += len;
          nbytes -= len;
          nsegs++;
+        pr_info("%d %s %d total %u nbytes %u nsegs %u len %u \n",
+                current->pid, __func__, __LINE__,
+                total, nbytes, nsegs, len);
      }

+    pr_info("%d %s %d total %u nbytes %u nsegs %u len %u \n",
+            current->pid, __func__, __LINE__,
+            total, nbytes, nsegs, len);
+    pr_info("%s %d<-----\n", __func__, __LINE__);
      return nsegs;
  }

-- 
2.40.0





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux