[bug report] blktests block/005: KASAN uaf at bio merge

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I observed blktests block/005 failure by KASAN use-after-free. The test case
repeats IO scheduler switch during fio workload. According to the kernel
message, it looks like bio submit path is running while IO scheduler is invalid,
and kyber_bio_merge accessed the invalid IO scheduler data.

The failure was observed with v5.17-rc1, and still observed with v5.17-rc7.
It was observed with QEMU NVME emulation device with ZNS zoned device
emulation. So far, it is not observed with other devices. To recreate the
failure, need to repeat the test case. In the worst case, 500 times repetition
is required.

I bisected and found the commit 9d497e2941c3 ("block: don't protect
submit_bio_checks by q_usage_counter") triggers it. The commit moves calls of
3 functions: submit_bio_checks, blk_mq_attempt_bio_merge and rq_qos_throttle.
I suspected that the move of blk_mq_attempt_bio_merge is the cause, and tried
to revert only that part. But it caused linux system boot hang related to
rq_qos_throttle. Then I created another patch which reverts moves of both
blk_mq_attempt_bio_merge and rq_qos_throttle calls [2]. With this patch, the
KASAN uaf disappeared. Based on this observation, I think the failure cause is
the move of blk_mq_attempt_bio_merge out of guard by bio_queue_enter.

I'm not sure if the fix by the patch [2] is good enough. With that fix, the
blk_mq_attempt_bio_merge call in blk_mq_get_new_requests is guarded with
bio_queue_enter, but the blk_mq_attempt_bio_merge call in
blk_mq_get_cached_request may not be well guarded. Comments for fix approach
will be appreciated.


[1]

[  335.931534] run blktests block/005 at 2022-03-07 10:15:29
[  336.285062] general protection fault, probably for non-canonical address 0xdffffc0000000011: 0000 [#1] PREEMPT SMP KASAN PTI
[  336.291190] KASAN: null-ptr-deref in range [0x0000000000000088-0x000000000000008f]
[  336.297513] CPU: 0 PID: 1864 Comm: fio Not tainted 5.16.0-rc3+ #15
[  336.302034] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.15.0-29-g6a62e0cb0dfe-prebuilt.qemu.org 04/01/2014
[  336.309636] RIP: 0010:kyber_bio_merge+0x121/0x320
[  336.315382] Code: 80 3c 16 00 0f 85 f8 01 00 00 49 8b ac 24 40 01 00 00 48 ba 00 00 00 00 00 fc ff df 48 8d bd 88 00 00 00 48 89 fe 48 c1 ee 03 <80> 3c 16 00 0f 85 80 01 00 00 49 8d bc 24 8c 01 00 00 4c 8b ad 88
[  336.330777] RSP: 0018:ffff8881105e7820 EFLAGS: 00010206
[  336.334971] RAX: 0000000000000000 RBX: ffffe8ffffc10040 RCX: 0000000000000000
[  336.340428] RDX: dffffc0000000000 RSI: 0000000000000011 RDI: 0000000000000088
[  336.345951] RBP: 0000000000000000 R08: 0000000000000000 R09: ffff888116b5a29f
[  336.351789] R10: ffffed1022d6b453 R11: 0000000000000001 R12: ffff88811321a800
[  336.357550] R13: 0000000000000000 R14: ffff8881105e7bf0 R15: 0000000000000001
[  336.363334] FS:  00007f1adf654b40(0000) GS:ffff8881e5600000(0000) knlGS:0000000000000000
[  336.369008] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  336.373065] CR2: 000000000049be98 CR3: 0000000118c86002 CR4: 0000000000170ef0
[  336.378103] Call Trace:
[  336.380390]  <TASK>
[  336.382386]  blk_mq_submit_bio+0xd0c/0x1b40
[  336.386036]  ? rcu_read_lock_sched_held+0x3f/0x70
[  336.389727]  ? blk_mq_try_issue_list_directly+0x3c0/0x3c0
[  336.394052]  ? get_user_pages+0xa0/0xa0
[  336.396975]  ? should_fail_request+0x70/0x70
[  336.400050]  __submit_bio+0x245/0x2e0
[  336.402967]  ? submit_bio_checks+0x1700/0x1700
[  336.406347]  ? find_held_lock+0x2c/0x110
[  336.409339]  submit_bio_noacct+0x5a1/0x810
[  336.412178]  ? __submit_bio+0x2e0/0x2e0
[  336.415044]  ? __blkdev_direct_IO_simple+0x3f8/0x6f0
[  336.418354]  submit_bio+0x149/0x340
[  336.420715]  ? submit_bio_noacct+0x810/0x810
[  336.423491]  __blkdev_direct_IO_simple+0x3cd/0x6f0
[  336.426723]  ? blkdev_llseek+0xc0/0xc0
[  336.429513]  ? lock_release+0x3a9/0x6d0
[  336.432221]  ? lock_downgrade+0x6b0/0x6b0
[  336.435057]  ? next_uptodate_page+0x59e/0x7d0
[  336.438730]  ? blkdev_get_block+0xd0/0xd0
[  336.442617]  ? fsnotify_first_mark+0x140/0x140
[  336.446481]  blkdev_read_iter+0x3e2/0x580
[  336.450322]  ? __do_fault+0x460/0x460
[  336.453902]  new_sync_read+0x35f/0x5d0
[  336.457137]  ? __fsnotify_parent+0x275/0xb20
[  336.460911]  ? __ia32_sys_llseek+0x2f0/0x2f0
[  336.464331]  ? __fsnotify_update_child_dentry_flags+0x2e0/0x2e0
[  336.467827]  ? inode_security+0x4f/0xf0
[  336.470315]  vfs_read+0x26c/0x4c0
[  336.472552]  __x64_sys_pread64+0x17c/0x1d0
[  336.474961]  ? vfs_read+0x4c0/0x4c0
[  336.477133]  ? syscall_enter_from_user_mode+0x21/0x70
[  336.479819]  do_syscall_64+0x3b/0x90
[  336.481788]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  336.484985] RIP: 0033:0x7f1ae8d7b81f
[  336.487043] Code: 08 89 3c 24 48 89 4c 24 18 e8 bd a8 f8 ff 4c 8b 54 24 18 48 8b 54 24 10 41 89 c0 48 8b 74 24 08 8b 3c 24 b8 11 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 31 44 89 c7 48 89 04 24 e8 fd a8 f8 ff 48 8b
[  336.496657] RSP: 002b:00007ffed8151d60 EFLAGS: 00000293 ORIG_RAX: 0000000000000011
[  336.500726] RAX: ffffffffffffffda RBX: 00000000017ce540 RCX: 00007f1ae8d7b81f
[  336.504702] RDX: 0000000000001000 RSI: 0000000001894000 RDI: 0000000000000008
[  336.508525] RBP: 00000000017ce540 R08: 0000000000000000 R09: 0000000000000001
[  336.512202] R10: 00000000104e6000 R11: 0000000000000293 R12: 00007f1ac9ef3e18
[  336.515665] R13: 0000000000000000 R14: 0000000000001000 R15: 0000000000000001
[  336.518168]  </TASK>

[2]

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d69ca91fbc8b..59c66b9e8a44 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2718,7 +2718,8 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q,
 
 static struct request *blk_mq_get_new_requests(struct request_queue *q,
 					       struct blk_plug *plug,
-					       struct bio *bio)
+					       struct bio *bio,
+					       unsigned int nsegs)
 {
 	struct blk_mq_alloc_data data = {
 		.q		= q,
@@ -2730,6 +2731,11 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	if (unlikely(bio_queue_enter(bio)))
 		return NULL;
 
+	if (blk_mq_attempt_bio_merge(q, bio, nsegs))
+		goto queue_exit;
+
+	rq_qos_throttle(q, bio);
+
 	if (plug) {
 		data.nr_tags = plug->nr_ios;
 		plug->nr_ios = 1;
@@ -2742,12 +2748,13 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_cleanup(q, bio);
 	if (bio->bi_opf & REQ_NOWAIT)
 		bio_wouldblock_error(bio);
+queue_exit:
 	blk_queue_exit(q);
 	return NULL;
 }
 
 static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
-		struct blk_plug *plug, struct bio *bio)
+		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
 {
 	struct request *rq;
 
@@ -2757,14 +2764,20 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 	if (!rq || rq->q != q)
 		return NULL;
 
-	if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
+	if (blk_mq_attempt_bio_merge(q, *bio, nsegs)) {
+		*bio = NULL;
+		return NULL;
+	}
+
+	if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type)
 		return NULL;
-	if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+	if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
 		return NULL;
 
-	rq->cmd_flags = bio->bi_opf;
+	rq->cmd_flags = (*bio)->bi_opf;
 	plug->cached_rq = rq_list_next(rq);
 	INIT_LIST_HEAD(&rq->queuelist);
+	rq_qos_throttle(q, *bio);
 	return rq;
 }
 
@@ -2800,14 +2813,11 @@ void blk_mq_submit_bio(struct bio *bio)
 	if (!bio_integrity_prep(bio))
 		return;
 
-	if (blk_mq_attempt_bio_merge(q, bio, nr_segs))
-		return;
-
-	rq_qos_throttle(q, bio);
-
-	rq = blk_mq_get_cached_request(q, plug, bio);
+	rq = blk_mq_get_cached_request(q, plug, &bio, nr_segs);
 	if (!rq) {
-		rq = blk_mq_get_new_requests(q, plug, bio);
+		if (!bio)
+			return;
+		rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
 		if (unlikely(!rq))
 			return;
 	}

-- 
Best Regards,
Shin'ichiro Kawasaki



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux