Hi Neil, I am repeatedly getting deadlock with MD raid5 & running fio check. array created just this way # mdadm -C -l 5 -n 4 -c 64 --assume-clean /dev/md0 /dev/sd[bcde] and running this test (on quadcore CPU) # fio --name=global --rw=randwrite --size=1G --bsrange=1k-128k --filename=/dev/md0 --name=job1 --name=job2 --name=job3 --name=job4 --end_fsync=1 deadlocks in final fsync() I can reproduce it on Fedora 3.2.7 kernel (and also 3.3.0-rc5), below is part of the sysrq trace (full sysrq in attached gz archive) I was able to simulate it even when resync is running, it stopped resync process as well. Please let me know if you need more information. Thanks, Milan log from3.2.7-1.fc16.x86_64 [ 1182.945100] md0_raid5 S ffff880130f9e040 0 1021 2 0x00000000 [ 1182.952259] ffff88012b8f3da0 0000000000000046 ffff880100000000 7fffffffffffffff [ 1182.959778] ffff880130f9dc80 ffff88012b8f3fd8 ffff88012b8f3fd8 ffff88012b8f3fd8 [ 1182.967272] ffffffff81a0d020 ffff880130f9dc80 ffff88012b8f3e60 00000001a01a25cf [ 1182.974853] Call Trace: [ 1182.977271] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1182.982342] [<ffffffff815e0505>] schedule_timeout+0x2a5/0x320 [ 1182.988266] [<ffffffff8109035e>] ? prepare_to_wait+0x5e/0x90 [ 1182.994104] [<ffffffff814662cc>] md_thread+0xec/0x140 [ 1182.999327] [<ffffffff81090440>] ? remove_wait_queue+0x50/0x50 [ 1183.005337] [<ffffffff814661e0>] ? md_rdev_init+0x130/0x130 [ 1183.010998] [<ffffffff8108fb9c>] kthread+0x8c/0xa0 [ 1183.015893] [<ffffffff815ebef4>] kernel_thread_helper+0x4/0x10 [ 1183.021928] [<ffffffff8108fb10>] ? kthread_worker_fn+0x190/0x190 [ 1183.028113] [<ffffffff815ebef0>] ? gs_change+0x13/0x13 [ 1183.033420] flush-9:0 D ffff88012f314920 0 1031 2 0x00000000 [ 1183.040598] ffff88011e8e35e0 0000000000000046 0003520000000000 ffff88011e8e3590 [ 1183.048133] ffff88012f314560 ffff88011e8e3fd8 ffff88011e8e3fd8 ffff88011e8e3fd8 [ 1183.055765] ffff880130f41720 ffff88012f314560 ffff88011e8e35a0 000000011e8e35a0 [ 1183.063304] Call Trace: [ 1183.065810] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1183.070861] [<ffffffffa019e16b>] get_active_stripe+0x20b/0x710 [raid456] [ 1183.077662] [<ffffffff81065420>] ? try_to_wake_up+0x290/0x290 [ 1183.083613] [<ffffffffa01a42ba>] make_request+0x19a/0x460 [raid456] [ 1183.090064] [<ffffffff81090440>] ? remove_wait_queue+0x50/0x50 [ 1183.096076] [<ffffffff81465cc6>] md_make_request+0xc6/0x200 [ 1183.101824] [<ffffffff8111ca55>] ? mempool_alloc_slab+0x15/0x20 [ 1183.107924] [<ffffffff8129c257>] generic_make_request+0xc7/0x100 [ 1183.114110] [<ffffffff8129c317>] submit_bio+0x87/0x110 [ 1183.119421] [<ffffffff811acd9a>] ? bio_alloc_bioset+0x5a/0xf0 [ 1183.125344] [<ffffffff811a73cb>] submit_bh+0xeb/0x120 [ 1183.130479] [<ffffffff811a9590>] __block_write_full_page+0x210/0x3a0 [ 1183.137035] [<ffffffff811a8cd0>] ? end_buffer_async_read+0x130/0x130 [ 1183.143574] [<ffffffff811ae8d0>] ? thaw_bdev+0x90/0x90 [ 1183.148884] [<ffffffff811ae8d0>] ? thaw_bdev+0x90/0x90 [ 1183.154193] [<ffffffff811aa02d>] block_write_full_page_endio+0xcd/0x100 [ 1183.160993] [<ffffffff8111a7a4>] ? find_get_pages_tag+0x44/0x140 [ 1183.167180] [<ffffffff811aa075>] block_write_full_page+0x15/0x20 [ 1183.173365] [<ffffffff811ae528>] blkdev_writepage+0x18/0x20 [ 1183.179111] [<ffffffff81123667>] __writepage+0x17/0x40 [ 1183.184420] [<ffffffff81123acd>] write_cache_pages+0x20d/0x460 [ 1183.190431] [<ffffffff81123650>] ? set_page_dirty_lock+0x60/0x60 [ 1183.196532] [<ffffffff81123d6a>] generic_writepages+0x4a/0x70 [ 1183.202476] [<ffffffff81125401>] do_writepages+0x21/0x40 [ 1183.207960] [<ffffffff811a00b1>] writeback_single_inode+0x141/0x3e0 [ 1183.214410] [<ffffffff815df6b4>] ? __schedule+0x3d4/0x8c0 [ 1183.219985] [<ffffffff811a04f0>] writeback_sb_inodes+0x1a0/0x240 [ 1183.226172] [<ffffffff811a062e>] __writeback_inodes_wb+0x9e/0xd0 [ 1183.232360] [<ffffffff811a10ab>] wb_writeback+0x25b/0x310 [ 1183.237846] [<ffffffff81191fa2>] ? get_nr_inodes+0x52/0x70 [ 1183.243443] [<ffffffff81192f02>] ? get_nr_dirty_inodes+0x52/0x80 [ 1183.249650] [<ffffffff811a11ff>] wb_check_old_data_flush+0x9f/0xb0 [ 1183.255925] [<ffffffff811a20d9>] wb_do_writeback+0x149/0x1d0 [ 1183.261783] [<ffffffff815df6b4>] ? __schedule+0x3d4/0x8c0 [ 1183.267355] [<ffffffff8107c520>] ? lock_timer_base+0x70/0x70 [ 1183.273192] [<ffffffff811a21e3>] bdi_writeback_thread+0x83/0x2a0 [ 1183.279380] [<ffffffff811a2160>] ? wb_do_writeback+0x1d0/0x1d0 [ 1183.285393] [<ffffffff8108fb9c>] kthread+0x8c/0xa0 [ 1183.290264] [<ffffffff815ebef4>] kernel_thread_helper+0x4/0x10 [ 1183.296211] [<ffffffff8108fb10>] ? kthread_worker_fn+0x190/0x190 [ 1183.302330] [<ffffffff815ebef0>] ? gs_change+0x13/0x13 [ 1183.307661] kworker/0:1 S ffff880130f5e040 0 1042 2 0x00000000 [ 1183.314834] ffff8800af62de60 0000000000000046 ffff8800af62de00 ffffffff812b1468 [ 1183.322417] ffff880130f5dc80 ffff8800af62dfd8 ffff8800af62dfd8 ffff8800af62dfd8 [ 1183.329984] ffff88012bb2ae40 ffff880130f5dc80 ffff88012c6e2478 0000000130f5dc80 [ 1183.337570] Call Trace: [ 1183.340074] [<ffffffff812b1468>] ? cfq_kick_queue+0x38/0x60 [ 1183.345824] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1183.350872] [<ffffffff8108b2b9>] worker_thread+0x239/0x350 [ 1183.356532] [<ffffffff8108b080>] ? manage_workers+0x230/0x230 [ 1183.362456] [<ffffffff8108fb9c>] kthread+0x8c/0xa0 [ 1183.367416] [<ffffffff815ebef4>] kernel_thread_helper+0x4/0x10 [ 1183.373428] [<ffffffff8108fb10>] ? kthread_worker_fn+0x190/0x190 [ 1183.379615] [<ffffffff815ebef0>] ? gs_change+0x13/0x13 [ 1183.384924] kworker/0:2 S ffff88012bee9ae0 0 1070 2 0x00000000 [ 1183.392098] ffff8801131bde60 0000000000000046 ffff8801131bde00 ffffffff812b1468 [ 1183.399665] ffff88012bee9720 ffff8801131bdfd8 ffff8801131bdfd8 ffff8801131bdfd8 [ 1183.407230] ffff88012bb2ae40 ffff88012bee9720 ffff88012c6e2478 000000012bee9720 [ 1183.414765] Call Trace: [ 1183.417268] [<ffffffff812b1468>] ? cfq_kick_queue+0x38/0x60 [ 1183.423017] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1183.428065] [<ffffffff8108b2b9>] worker_thread+0x239/0x350 [ 1183.433725] [<ffffffff8108b080>] ? manage_workers+0x230/0x230 [ 1183.439646] [<ffffffff8108fb9c>] kthread+0x8c/0xa0 [ 1183.444607] [<ffffffff815ebef4>] kernel_thread_helper+0x4/0x10 [ 1183.450620] [<ffffffff8108fb10>] ? kthread_worker_fn+0x190/0x190 [ 1183.456807] [<ffffffff815ebef0>] ? gs_change+0x13/0x13 [ 1183.462116] fio S ffff88012c579ae0 0 1091 994 0x00000000 [ 1183.469203] ffff88012bce3e58 0000000000000082 ffff880100000000 ffffffff81093b11 [ 1183.476851] ffff88012c579720 ffff88012bce3fd8 ffff88012bce3fd8 ffff88012bce3fd8 [ 1183.484339] ffff880130f99720 ffff88012c579720 ffff88012bce3e38 0000000100000282 [ 1183.491903] Call Trace: [ 1183.494408] [<ffffffff81093b11>] ? lock_hrtimer_base+0x31/0x60 [ 1183.500419] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1183.505465] [<ffffffff815e0dac>] do_nanosleep+0x9c/0xd0 [ 1183.510774] [<ffffffff81094cc8>] hrtimer_nanosleep+0xb8/0x180 [ 1183.516721] [<ffffffff81093900>] ? update_rmtp+0x70/0x70 [ 1183.522204] [<ffffffff81094614>] ? hrtimer_start_range_ns+0x14/0x20 [ 1183.528656] [<ffffffff81094df1>] sys_nanosleep+0x61/0x80 [ 1183.534142] [<ffffffff815e9d82>] system_call_fastpath+0x16/0x1b [ 1183.540239] fio S ffff88012c57b200 0 1092 994 0x00000000 [ 1183.547413] ffff88012bc73e58 0000000000000082 ffff880100000000 ffffffff8135f33e [ 1183.554858] ffff88012c57ae40 ffff88012bc73fd8 ffff88012bc73fd8 ffff88012bc73fd8 [ 1183.562505] ffff880130f41720 ffff88012c57ae40 ffff88012bd81810 0000000100000282 [ 1183.570090] Call Trace: [ 1183.572594] [<ffffffff8135f33e>] ? tty_ldisc_deref+0xe/0x10 [ 1183.578342] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1183.583301] [<ffffffff815e0dac>] do_nanosleep+0x9c/0xd0 [ 1183.588633] [<ffffffff81094cc8>] hrtimer_nanosleep+0xb8/0x180 [ 1183.594578] [<ffffffff81093900>] ? update_rmtp+0x70/0x70 [ 1183.600062] [<ffffffff81094614>] ? hrtimer_start_range_ns+0x14/0x20 [ 1183.606510] [<ffffffff81094df1>] sys_nanosleep+0x61/0x80 [ 1183.611908] [<ffffffff815e9d82>] system_call_fastpath+0x16/0x1b [ 1183.618027] fio D ffff880130f46040 0 1096 1091 0x00000000 [ 1183.625149] ffff88011469d928 0000000000000082 0003520000000000 ffff88011469d8d8 [ 1183.632733] ffff880130f45c80 ffff88011469dfd8 ffff88011469dfd8 ffff88011469dfd8 [ 1183.640316] ffff880130f99720 ffff880130f45c80 ffff88011469d8e8 000000011469d8e8 [ 1183.647896] Call Trace: [ 1183.650402] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1183.655453] [<ffffffffa019e16b>] get_active_stripe+0x20b/0x710 [raid456] [ 1183.662342] [<ffffffff81065420>] ? try_to_wake_up+0x290/0x290 [ 1183.668268] [<ffffffffa01a42ba>] make_request+0x19a/0x460 [raid456] [ 1183.675248] [<ffffffff81090440>] ? remove_wait_queue+0x50/0x50 [ 1183.681259] [<ffffffff81465cc6>] md_make_request+0xc6/0x200 [ 1183.687007] [<ffffffff8111ca55>] ? mempool_alloc_slab+0x15/0x20 [ 1183.693110] [<ffffffff8129c257>] generic_make_request+0xc7/0x100 [ 1183.699300] [<ffffffff8129c317>] submit_bio+0x87/0x110 [ 1183.704610] [<ffffffff811acd9a>] ? bio_alloc_bioset+0x5a/0xf0 [ 1183.710531] [<ffffffff811a73cb>] submit_bh+0xeb/0x120 [ 1183.715754] [<ffffffff811a9590>] __block_write_full_page+0x210/0x3a0 [ 1183.722290] [<ffffffff811a8cd0>] ? end_buffer_async_read+0x130/0x130 [ 1183.728828] [<ffffffff811ae8d0>] ? thaw_bdev+0x90/0x90 [ 1183.734139] [<ffffffff811ae8d0>] ? thaw_bdev+0x90/0x90 [ 1183.739361] [<ffffffff811aa02d>] block_write_full_page_endio+0xcd/0x100 [ 1183.746183] [<ffffffff811aa075>] block_write_full_page+0x15/0x20 [ 1183.752369] [<ffffffff811ae528>] blkdev_writepage+0x18/0x20 [ 1183.758116] [<ffffffff81123667>] __writepage+0x17/0x40 [ 1183.763425] [<ffffffff81123acd>] write_cache_pages+0x20d/0x460 [ 1183.769353] [<ffffffff8113ee47>] ? handle_pte_fault+0xf7/0xac0 [ 1183.775385] [<ffffffff81123650>] ? set_page_dirty_lock+0x60/0x60 [ 1183.781574] [<ffffffff81095467>] ? __srcu_read_unlock+0x27/0x30 [ 1183.787673] [<ffffffff81123d6a>] generic_writepages+0x4a/0x70 [ 1183.793597] [<ffffffff81125401>] do_writepages+0x21/0x40 [ 1183.799084] [<ffffffff8111b27b>] __filemap_fdatawrite_range+0x5b/0x60 [ 1183.805709] [<ffffffff8111b2d2>] filemap_write_and_wait_range+0x52/0x80 [ 1183.812510] [<ffffffff811ae814>] blkdev_fsync+0x24/0x50 [ 1183.817906] [<ffffffff811a59b6>] do_fsync+0x56/0x80 [ 1183.822953] [<ffffffff811a5ce0>] sys_fsync+0x10/0x20 [ 1183.828001] [<ffffffff815e9d82>] system_call_fastpath+0x16/0x1b [ 1183.834121] kworker/0:0 S ffff88012bb2b200 0 1097 2 0x00000000 [ 1183.841296] ffff88009c38de60 0000000000000046 ffff880100000000 ffff880137c17200 [ 1183.848864] ffff88012bb2ae40 ffff88009c38dfd8 ffff88009c38dfd8 ffff88009c38dfd8 [ 1183.856396] ffffffff81a0d020 ffff88012bb2ae40 ffff880137c10d68 0000000197fbfe80 [ 1183.863966] Call Trace: [ 1183.866470] [<ffffffff815dfebf>] schedule+0x3f/0x60 [ 1183.871516] [<ffffffff8108b2b9>] worker_thread+0x239/0x350 [ 1183.877176] [<ffffffff8108b080>] ? manage_workers+0x230/0x230 [ 1183.883099] [<ffffffff8108fb9c>] kthread+0x8c/0xa0 [ 1183.887969] [<ffffffff815ebef4>] kernel_thread_helper+0x4/0x10 [ 1183.894002] [<ffffffff8108fb10>] ? kthread_worker_fn+0x190/0x190 [ 1183.900188] [<ffffffff815ebef0>] ? gs_change+0x13/0x13 ...
Attachment:
messages3.gz
Description: application/gzip