On 7/8/24 23:21, Yi Zhang wrote: > Hello > blktests block/032 triggered the below issue on commit [1] during CKI > tests, please help check it, thanks. > > [1] > https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git @ for-next > Commit Hash: 7f8851d381f71952787db6a1a71bef4d286f3df0 > https://datawarehouse.cki-project.org/kcidb/checkouts/redhat:1364149314 I've seen the same thing :- block/for-next branch resulting in Oops [1]. Following might be the commit causing the issue :- Author: Christoph Hellwig <hch@xxxxxx> Date: Sat Jul 6 09:52:18 2024 +0200 block: pass a phys_addr_t to get_max_segment_size * First warning is coming from :- 610 int __blk_rq_map_sg(struct request_queue *q, struct request *rq, 611 struct scatterlist *sglist, struct scatterlist **last_sg) 612 { 613 int nsegs = 0; 614 615 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) 616 nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg); 617 else if (rq->bio) 618 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg); 619 620 if (*last_sg) 621 sg_mark_end(*last_sg); 622 623 /* 624 * Something must have been wrong if the figured number of 625 * segment is bigger than number of req's physical segments 626 */ ----> 627 WARN_ON(nsegs > blk_rq_nr_phys_segments(rq)); 628 629 return nsegs; 630 } * Second warning from scsi driver :- scsi_alloc_sgtables() 1160 BUG_ON(count > cmd->sdb.table.nents); Looking at blk_bvec_map_sg() the calculation at the end of the loop :- nbytes -= len; is associated with get_max_segment_size() that is from the commit mentioned above and hence loop is not terminating correctly with the right nbytes value leading to wrong nr segment count. Please have a look, below is the log with debug patch for reference. -ck [1] <6>[ 9.958970] blk_bvec_map_sg 492----> <6>[ 9.958975] 817 blk_bvec_map_sg 499 total 0 nbytes 8192 nsegs 0 len 8192 <6>[ 9.958977] 817 blk_bvec_map_sg 514 <6>[ 9.958977] 817 blk_bvec_map_sg 517 <6>[ 9.958978] 817 blk_bvec_map_sg 519 <6>[ 9.958979] 817 blk_bvec_map_sg 521 total 0 nbytes 8192 nsegs 0 len 8192 <6>[ 9.958980] 817 blk_bvec_map_sg 528 total 8192 nbytes 0 nsegs 1 len 8192 <6>[ 9.958981] 817 blk_bvec_map_sg 533 total 8192 nbytes 0 nsegs 1 <6>[ 9.958982] blk_bvec_map_sg 536<----- <6>[ 9.958982] blk_bvec_map_sg 492----> <6>[ 9.958983] 817 blk_bvec_map_sg 499 total 0 nbytes 8192 nsegs 0 len 8192 <6>[ 9.958983] 817 blk_bvec_map_sg 514 <6>[ 9.958984] 817 blk_bvec_map_sg 517 <6>[ 9.958984] 817 blk_bvec_map_sg 519 <6>[ 9.958985] 817 blk_bvec_map_sg 521 total 0 nbytes 8192 nsegs 0 len 8192 <6>[ 9.958986] 817 blk_bvec_map_sg 528 total 8192 nbytes 0 nsegs 1 len 8192 <6>[ 9.958987] 817 blk_bvec_map_sg 533 total 8192 nbytes 0 nsegs 1 <6>[ 9.958988] blk_bvec_map_sg 536<----- <6>[ 9.958988] blk_bvec_map_sg 492----> <6>[ 9.958988] 817 blk_bvec_map_sg 499 total 0 nbytes 8192 nsegs 0 len 8192 <6>[ 9.958989] 817 blk_bvec_map_sg 514 <6>[ 9.958990] 817 blk_bvec_map_sg 517 <6>[ 9.958990] 817 blk_bvec_map_sg 519 <6>[ 9.958991] 817 blk_bvec_map_sg 521 total 0 nbytes 8192 nsegs 0 len 8192 <6>[ 9.958992] 817 blk_bvec_map_sg 528 total 8192 nbytes 0 nsegs 1 len 8192 <6>[ 9.958993] 817 blk_bvec_map_sg 533 total 8192 nbytes 0 nsegs 1 <6>[ 9.958993] blk_bvec_map_sg 536<----- <6>[ 9.959343] blk_bvec_map_sg 492----> <6>[ 9.959350] 875 blk_bvec_map_sg 499 total 0 nbytes 24576 nsegs 0 len 8192 <6>[ 9.959353] 875 blk_bvec_map_sg 514 <6>[ 9.959355] 875 blk_bvec_map_sg 517 <6>[ 9.959356] 875 blk_bvec_map_sg 519 <6>[ 9.959358] 875 blk_bvec_map_sg 521 total 0 nbytes 24576 nsegs 0 len 8192 <6>[ 9.959360] 875 blk_bvec_map_sg 528 total 8192 nbytes 16384 nsegs 1 len 8192 <6>[ 9.959362] 875 blk_bvec_map_sg 499 total 8192 nbytes 16384 nsegs 1 len 8192 <6>[ 9.959364] 875 blk_bvec_map_sg 514 <6>[ 9.959365] 875 blk_bvec_map_sg 517 <6>[ 9.959366] 875 blk_bvec_map_sg 519 <6>[ 9.959366] 875 blk_bvec_map_sg 521 total 8192 nbytes 16384 nsegs 1 len 8192 <6>[ 9.959368] 875 blk_bvec_map_sg 528 total 16384 nbytes 8192 nsegs 2 len 8192 <6>[ 9.959370] 875 blk_bvec_map_sg 499 total 16384 nbytes 8192 nsegs 2 len 8192 <6>[ 9.959372] 875 blk_bvec_map_sg 514 <6>[ 9.959373] 875 blk_bvec_map_sg 517 <6>[ 9.959373] 875 blk_bvec_map_sg 519 <6>[ 9.959374] 875 blk_bvec_map_sg 521 total 16384 nbytes 8192 nsegs 2 len 8192 <6>[ 9.959376] 875 blk_bvec_map_sg 528 total 24576 nbytes 0 nsegs 3 len 8192 <6>[ 9.959377] 875 blk_bvec_map_sg 533 total 24576 nbytes 0 nsegs 3 <6>[ 9.959378] blk_bvec_map_sg 536<----- <4>[ 9.959395] ------------[ cut here ]------------ <4>[ 9.959397] WARNING: CPU: 8 PID: 875 at block/blk-merge.c:627 __blk_rq_map_sg+0x5f2/0x620 <4>[ 9.959402] Modules linked in: failover(+) sha256_ssse3(+) sha1_ssse3 serio_raw dimlib virtio_blk drm(+) ata_generic pata_acpi qemu_fw_cfg ipmi_devintf ipmi_msghandler fuse <4>[ 9.959412] CPU: 8 PID: 875 Comm: kworker/8:1H Tainted: G N 6.10.0-rc6lblk+ #34 <4>[ 9.959415] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 <4>[ 9.959417] Workqueue: kblockd blk_mq_run_work_fn <4>[ 9.959421] RIP: 0010:__blk_rq_map_sg+0x5f2/0x620 <4>[ 9.959425] RSP: 0018:ffffc9000147fb50 EFLAGS: 00010202 <4>[ 9.959427] RAX: 0000000000000002 RBX: 0000000000000003 RCX: 0000000000000001 <4>[ 9.959428] RDX: ffff88810f879260 RSI: ffffffff8278ffa7 RDI: ffff88810f879000 <4>[ 9.959430] RBP: ffffea00043e7c80 R08: ffff88983ff397a8 R09: 00000000ffffbfff <4>[ 9.959431] R10: ffff8897df0a0000 R11: ffff88983fed97c0 R12: 0000000000000000 <4>[ 9.959432] R13: 0000000000006000 R14: ffff88810ea1d100 R15: ffffc9000147fc08 <4>[ 9.959436] FS: 0000000000000000(0000) GS:ffff8897df400000(0000) knlGS:0000000000000000 <4>[ 9.959438] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 9.959439] CR2: 00007f7920032000 CR3: 000000017bf7c000 CR4: 0000000000350ef0 <4>[ 9.959442] DR0: ffffffff837f7300 DR1: ffffffff837f7301 DR2: ffffffff837f7302 <4>[ 9.959444] DR3: ffffffff837f7303 DR6: 00000000ffff0ff0 DR7: 0000000000000600 <4>[ 9.959445] Call Trace: <4>[ 9.959457] <TASK> <4>[ 9.959459] ? __warn+0x7f/0x120 <4>[ 9.959462] ? __blk_rq_map_sg+0x5f2/0x620 <4>[ 9.959464] ? report_bug+0x1c3/0x1d0 <4>[ 9.959468] ? handle_bug+0x42/0x70 <4>[ 9.959471] ? exc_invalid_op+0x14/0x70 <4>[ 9.959473] ? asm_exc_invalid_op+0x16/0x20 <4>[ 9.959477] ? __blk_rq_map_sg+0x5f2/0x620 <4>[ 9.959480] scsi_alloc_sgtables+0xb8/0x3f0 <4>[ 9.959484] sd_init_command+0x17c/0xb40 <4>[ 9.959487] ? sbitmap_find_bit+0x93/0x170 <4>[ 9.959491] scsi_queue_rq+0x819/0xc40 <4>[ 9.959493] blk_mq_dispatch_rq_list+0x28c/0x740 <4>[ 9.959498] ? sysvec_kvm_asyncpf_interrupt+0xc1/0xd0 <4>[ 9.959502] __blk_mq_sched_dispatch_requests+0xb7/0x5f0 <4>[ 9.959505] ? __perf_event_task_sched_out+0x35/0x460 <4>[ 9.959509] ? finish_task_switch.isra.0+0x9e/0x2e0 <4>[ 9.959513] blk_mq_sched_dispatch_requests+0x2d/0x60 <4>[ 9.959516] blk_mq_run_work_fn+0x60/0x70 <4>[ 9.959519] process_one_work+0x158/0x360 <4>[ 9.959523] worker_thread+0x2e7/0x400 <4>[ 9.959526] ? __pfx_worker_thread+0x10/0x10 <4>[ 9.959529] kthread+0xdb/0x110 <4>[ 9.959531] ? __pfx_kthread+0x10/0x10 <4>[ 9.959533] ret_from_fork+0x2d/0x50 <4>[ 9.959537] ? __pfx_kthread+0x10/0x10 <4>[ 9.959539] ret_from_fork_asm+0x1a/0x30 <4>[ 9.959544] </TASK> <4>[ 9.959545] ---[ end trace 0000000000000000 ]--- <4>[ 9.959558] ------------[ cut here ]------------ <2>[ 9.959559] kernel BUG at drivers/scsi/scsi_lib.c:1160! <6>[ 9.959726] ACPI: bus type drm_connector registered [8]kdb> [2] Debug patch :- From e8f4ec280cae5d0741cb7080cc3a0fbbfd20e0d7 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni <kch@xxxxxxxxxx> Date: Mon, 8 Jul 2024 20:39:59 -0700 Subject: [PATCH] block: blk_bvec_ma_sg() debug Signed-off-by: Chaitanya Kulkarni <kch@xxxxxxxxxx> --- block/blk-merge.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/block/blk-merge.c b/block/blk-merge.c index e41ea3318099..d0932aff4fee 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -489,12 +489,17 @@ static unsigned blk_bvec_map_sg(struct request_queue *q, unsigned nbytes = bvec->bv_len; unsigned nsegs = 0, total = 0; + pr_info("%s %d---->\n", __func__, __LINE__); while (nbytes > 0) { unsigned offset = bvec->bv_offset + total; unsigned len = get_max_segment_size(&q->limits, bvec_phys(bvec), nbytes); struct page *page = bvec->bv_page; + pr_info(" %d %s %d total %u nbytes %u nsegs %u len %u \n", + current->pid, __func__, __LINE__, + total, nbytes, nsegs, len); + /* * Unfortunately a fair number of drivers barf on scatterlists * that have an offset larger than PAGE_SIZE, despite other @@ -506,14 +511,29 @@ static unsigned blk_bvec_map_sg(struct request_queue *q, page += (offset >> PAGE_SHIFT); offset &= ~PAGE_MASK; + pr_info("%d %s %d\n", current->pid, __func__, __LINE__); *sg = blk_next_sg(sg, sglist); + + pr_info("%d %s %d\n", current->pid, __func__, __LINE__); sg_set_page(*sg, page, len, offset); + pr_info("%d %s %d\n", current->pid, __func__, __LINE__); + + pr_info("%d %s %d total %u nbytes %u nsegs %u len %u \n", + current->pid, __func__, __LINE__, + total, nbytes, nsegs, len); total += len; nbytes -= len; nsegs++; + pr_info("%d %s %d total %u nbytes %u nsegs %u len %u \n", + current->pid, __func__, __LINE__, + total, nbytes, nsegs, len); } + pr_info("%d %s %d total %u nbytes %u nsegs %u len %u \n", + current->pid, __func__, __LINE__, + total, nbytes, nsegs, len); + pr_info("%s %d<-----\n", __func__, __LINE__); return nsegs; } -- 2.40.0