DM brokeness with NOWAIT

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Threw some db traffic into my testing mix, and that ended in tears
very quickly:

CPU: 7 PID: 49609 Comm: ringbuf-read.t Tainted: G        W          6.6.0-rc1-g39956d2dcd81 #129
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
Call Trace:
 <TASK>
 dump_stack_lvl+0x11d/0x1b0
 __might_resched+0x3c3/0x5e0
 ? preempt_count_sub+0x150/0x150
 mempool_alloc+0x1e2/0x390
 ? sanity_check_pinned_pages+0x23/0x1010
 ? mempool_resize+0x7d0/0x7d0
 bio_alloc_bioset+0x417/0x8c0
 ? bvec_alloc+0x200/0x200
 ? __gup_device_huge+0x900/0x900
 bio_alloc_clone+0x53/0x100
 dm_submit_bio+0x27f/0x1a20
 ? lock_release+0x4b7/0x670
 ? pin_user_pages_fast+0xb6/0xf0
 ? blk_try_enter_queue+0x1a0/0x4d0
 ? dm_dax_direct_access+0x260/0x260
 ? rcu_is_watching+0x12/0xb0
 ? blk_try_enter_queue+0x1cc/0x4d0
 __submit_bio+0x239/0x310
 ? __bio_queue_enter+0x700/0x700
 ? kvm_clock_get_cycles+0x40/0x60
 ? ktime_get+0x285/0x470
 submit_bio_noacct_nocheck+0x4d9/0xb80
 ? should_fail_request+0x80/0x80
 ? preempt_count_sub+0x150/0x150
 ? folio_flags+0x6c/0x1e0
 submit_bio_noacct+0x53e/0x1b30
 blkdev_direct_IO.part.0+0x833/0x1810
 ? rcu_is_watching+0x12/0xb0
 ? lock_release+0x4b7/0x670
 ? blkdev_read_iter+0x40d/0x530
 ? reacquire_held_locks+0x4e0/0x4e0
 ? __blkdev_direct_IO_simple+0x780/0x780
 ? rcu_is_watching+0x12/0xb0
 ? __mark_inode_dirty+0x297/0xd50
 ? preempt_count_add+0x72/0x140
 blkdev_read_iter+0x2a4/0x530
 ? blkdev_write_iter+0xc40/0xc40
 io_read+0x369/0x1490
 ? rcu_is_watching+0x12/0xb0
 ? io_writev_prep_async+0x260/0x260
 ? __fget_files+0x279/0x410
 ? rcu_is_watching+0x12/0xb0
 io_issue_sqe+0x18a/0xd90
 io_submit_sqes+0x970/0x1ed0
 __do_sys_io_uring_enter+0x14d4/0x2650
 ? io_submit_sqes+0x1ed0/0x1ed0
 ? rcu_is_watching+0x12/0xb0
 ? __do_sys_io_uring_register+0x3f6/0x2190
 ? io_req_caches_free+0x500/0x500
 ? ksys_mmap_pgoff+0x85/0x5b0
 ? rcu_is_watching+0x12/0xb0
 ? trace_irq_enable.constprop.0+0xd0/0x100
 do_syscall_64+0x39/0xb0
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

which seems to demonstrate a misunderstanding on what REQ_NOWAIT is
about. In particulary, it seems to assume you can then submit with
atomic context? DM does an rcu_read_lock() and happily proceeds to
attempt to submit IO under RCU being disabled.

A test case for this is pretty trivial, just do RWF_NOWAIT IO on any dm
device:

int main(int argc, char *argv[])
{
	struct iovec iov;
	void *buf;
	int fd;

	fd = open("/dev/dm-0", O_RDONLY | O_DIRECT);
	if (fd < 0) {
		perror("open");
		return 1;
	}

	if (posix_memalign(&buf, 4096, 4096))
		return 1;

	iov.iov_base = buf;
	iov.iov_len = 4096;
	preadv2(fd, &iov, 1, 0, RWF_NOWAIT);

	return 0;
}

and watch the splat go by. I didn't check which kernel had this
brokeness introduced, a quick check shows it's in 6.5 too at least.
Really looks like someone added a fast NOWAIT version, but then didn't
actually test it at all...

Quick patch below makes it go away, as expected, as we'd resort to using
SRCU.

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f0f118ab20fa..64a1f306c96c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -715,24 +715,6 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
 	rcu_read_unlock();
 }
 
-static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md,
-					int *srcu_idx, blk_opf_t bio_opf)
-{
-	if (bio_opf & REQ_NOWAIT)
-		return dm_get_live_table_fast(md);
-	else
-		return dm_get_live_table(md, srcu_idx);
-}
-
-static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx,
-					 blk_opf_t bio_opf)
-{
-	if (bio_opf & REQ_NOWAIT)
-		dm_put_live_table_fast(md);
-	else
-		dm_put_live_table(md, srcu_idx);
-}
-
 static char *_dm_claim_ptr = "I belong to device-mapper";
 
 /*
@@ -1833,9 +1815,8 @@ static void dm_submit_bio(struct bio *bio)
 	struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
 	int srcu_idx;
 	struct dm_table *map;
-	blk_opf_t bio_opf = bio->bi_opf;
 
-	map = dm_get_live_table_bio(md, &srcu_idx, bio_opf);
+	map = dm_get_live_table(md, &srcu_idx);
 
 	/* If suspended, or map not yet available, queue this IO for later */
 	if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
@@ -1851,7 +1832,7 @@ static void dm_submit_bio(struct bio *bio)
 
 	dm_split_and_process_bio(md, map, bio);
 out:
-	dm_put_live_table_bio(md, srcu_idx, bio_opf);
+	dm_put_live_table(md, srcu_idx);
 }
 
 static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,

-- 
Jens Axboe

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux