From: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> Hello, I encounter below problems when do blktests nvme/040 on a qemu vm, there are two kinds of bug traces, but the cause should be the same. CPU0 CPU1 nvme_cancel_tagset() blk_mq_tagset_busy_iter() bt_tags_iter() rq = blk_mq_find_and_get_req() // ref = 1 + 1 = 2 nvme_cancel_request() // rq->state == MQ_RQ_IN_FLIGHT blk_mq_complete_request(rq) blk_mq_free_request(rq) req_ref_put_and_test() // ref = 2 - 1 = 1 blk_mq_put_rq_ref() // ref = 0 __blk_mq_free_request() blk_mq_complete_request_remote(rq) rq->mq_hctx->nr_ctx // rq->mq_hctx == 0 // nr_ctx offset is 0x136 I change the rq->state setting in blk_mq_complete_request_remote() to use cmpxchg() to make sure one request complete only once. Then run blktests nvme/040 for a long time, no problem happened again. But I'm not familiar with these nvme driver code, maybe this is a bug in the nvme driver code that nvme_cancel_tagset() should forbid inflight request from concurrent completing ? Bug1: ===== BUG: kernel NULL pointer dereference, address: 0000000000000136 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 13 PID: 95 Comm: ksoftirqd/13 Tainted: G E 6.4.0-rc7-next-20230623+ #134 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc37 04/01/2014 RIP: 0010:blk_mq_complete_request_remote+0x1c/0x140 Code: 88 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 f3 0f 1e fa 0f 1f 44 00 00 55 53 48 8b 47 10 48 89 fb c7 87 94 00 00 00 02 00 00 00 <66> 83 b8 36 01 00 00 01 0f 84 bc 00 00 00 f6 43 1a 40 74 05 31 c0 RSP: 0018:ffffad1a403abd88 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff999d891b2d00 RCX: 0000000000000000 RDX: 000000000000006b RSI: 000000000000000c RDI: ffff999d891b2d00 RBP: ffff999d89a4ce78 R08: 000000000000000a R09: 000000000000005f R10: ffffffffbc0060c0 R11: 0000000000000000 R12: ffff999d89a4ce70 R13: ffff999d891b2e78 R14: 0000000000001000 R15: 0000000000001000 FS: 0000000000000000(0000) GS:ffff99a0afd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000136 CR3: 0000000108528002 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> ? __die+0x23/0x70 ? page_fault_oops+0x159/0x440 ? check_preempt_wakeup+0x13a/0x2c0 ? exc_page_fault+0x77/0x170 ? asm_exc_page_fault+0x26/0x30 ? blk_mq_complete_request_remote+0x1c/0x140 ? psi_group_change+0x168/0x400 nvme_loop_queue_response+0xe6/0x1a0 __nvmet_req_complete+0x55/0x200 nvmet_req_complete+0x16/0x50 nvmet_bio_done+0x2b/0x50 blk_update_request+0x15c/0x4c0 ? __schedule+0x3d4/0x1440 ? sbitmap_queue_clear+0x3b/0x60 blk_mq_end_request+0x1e/0xd0 blk_complete_reqs+0x3a/0x50 __do_softirq+0xcf/0x2b6 ? sort_range+0x20/0x20 run_ksoftirqd+0x28/0x40 smpboot_thread_fn+0xcb/0x1b0 kthread+0xe5/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 </TASK> Bug2: ===== WARNING: CPU: 1 PID: 158 at block/blk.h:505 blk_mq_put_rq_ref+0x51/0x60 Modules linked in: loop(E) edac_core(E) intel_rapl_msr(E) intel_rapl_common(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) sha512_ssse3(E) sha512_generic(E) sr_mod(E) psmouse(E) cdrom(E) aesni_intel(E) ata_generic(E) nd_pmem(E) crypto_simd(E) cryptd(E) nd_btt(E) dax_pmem(E) evdev(E) serio_raw(E) pcspkr(E) floppy(E) ata_piix(E) nfit(E) libata(E) libnvdimm(E) virtio_blk(E) i2c_piix4(E) button(E) CPU: 1 PID: 158 Comm: kworker/u32:3 Tainted: G E 6.4.0+ #137 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc37 04/01/2014 Workqueue: nvme-reset-wq nvme_loop_reset_ctrl_work RIP: 0010:blk_mq_put_rq_ref+0x51/0x60 Code: c3 8b 83 98 00 00 00 83 c0 7f 83 f8 7f 76 1b f0 ff 8b 98 00 00 00 75 e7 48 89 df 5b e9 48 d4 ff ff 48 89 df 5b e9 2f d5 ff ff <0f> 0b eb e1 66 66 2e 0f 1f 84 00 00 00 00 00 66 0f 1f 00 0f 1f 44 RSP: 0018:ffffbd0a00593d88 EFLAGS: 00010246 RAX: 000000000000007f RBX: ffff9990ca066800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff9990ca066800 RBP: ffff9990ca066800 R08: 0000000000004f53 R09: 00000000003d0900 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000010 R14: ffffbd0a00593df0 R15: 0000000000000005 FS: 0000000000000000(0000) GS:ffff9993efa40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bb72eaed08 CR3: 000000010a2ce001 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> ? blk_mq_put_rq_ref+0x51/0x60 ? __warn+0x81/0x130 ? blk_mq_put_rq_ref+0x51/0x60 ? report_bug+0x16d/0x1a0 ? handle_bug+0x41/0x70 ? exc_invalid_op+0x17/0x60 ? asm_exc_invalid_op+0x1a/0x20 ? blk_mq_put_rq_ref+0x51/0x60 ? blk_mq_put_rq_ref+0x12/0x60 bt_tags_iter+0x79/0xb0 blk_mq_tagset_busy_iter+0x1b5/0x330 ? nvme_try_sched_reset+0x40/0x40 ? nvme_try_sched_reset+0x40/0x40 nvme_cancel_tagset+0x25/0x40 nvme_loop_shutdown_ctrl+0x2a/0x90 nvme_loop_reset_ctrl_work+0x2e/0x120 process_one_work+0x1dc/0x3d0 worker_thread+0x1af/0x380 ? rescuer_thread+0x3b0/0x3b0 kthread+0xe5/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 </TASK> Signed-off-by: Chengming Zhou <zhouchengming@xxxxxxxxxxxxx> --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 5504719b970d..a83aed2bc964 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1169,7 +1169,8 @@ static void blk_mq_raise_softirq(struct request *rq) bool blk_mq_complete_request_remote(struct request *rq) { - WRITE_ONCE(rq->state, MQ_RQ_COMPLETE); + if (cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) != MQ_RQ_IN_FLIGHT) + return true; /* * For request which hctx has only one ctx mapping, -- 2.41.0