Hello, I am not sure whether there is some to fix or not, since it can be triggered only by root. But still if I run the following program several times in a row, whole machine becomes unusable for several minutes (ssh, ps and pretty much everything hangs): // autogenerated by syzkaller (http://github.com/google/syzkaller) #include <pthread.h> #include <stdint.h> #include <string.h> #include <sys/syscall.h> #include <unistd.h> int main() { syscall(SYS_mmap, 0x20000000ul, 0x25000ul, 0x3ul, 0x32ul, -1, 0x0ul); int fd = syscall(SYS_open, "/dev/sg0", 0x20202ul); *(uint32_t*)0x2001f639 = (uint32_t)0x50; *(uint32_t*)0x2001f63d = (uint32_t)0x0; *(uint64_t*)0x2001f641 = (uint64_t)0xffff; *(uint32_t*)0x2001f649 = (uint32_t)0x8688; *(uint32_t*)0x2001f64d = (uint32_t)0x8; *(uint32_t*)0x2001f651 = (uint32_t)0x2; *(uint32_t*)0x2001f655 = (uint32_t)0x8; *(uint16_t*)0x2001f659 = (uint16_t)0xfffffffffffff801; *(uint16_t*)0x2001f65b = (uint16_t)0x1f; *(uint32_t*)0x2001f65d = (uint32_t)0x8; *(uint32_t*)0x2001f661 = (uint32_t)0x7fbe57c5; *(uint32_t*)0x2001f665 = (uint32_t)0x0; *(uint32_t*)0x2001f669 = (uint32_t)0x0; *(uint32_t*)0x2001f66d = (uint32_t)0x0; *(uint32_t*)0x2001f671 = (uint32_t)0x0; *(uint32_t*)0x2001f675 = (uint32_t)0x0; *(uint32_t*)0x2001f679 = (uint32_t)0x0; *(uint32_t*)0x2001f67d = (uint32_t)0x0; *(uint32_t*)0x2001f681 = (uint32_t)0x0; *(uint32_t*)0x2001f685 = (uint32_t)0x0; syscall(SYS_write, fd, 0x2001f639ul, 0x50ul, 0, 0, 0); return 0; } Sysrq d shows the following during the hang: [ 131.732528] Showing all locks held in the system: [ 131.732995] 3 locks held by kworker/u9:2/151: [ 131.733321] #0: ("writeback"){.+.+.+}, at: [<ffffffff813afd89>] process_one_work+0x699/0x15c0 [ 131.734064] #1: ((&(&wb->dwork)->work)){+.+.+.}, at: [<ffffffff813afdb9>] process_one_work+0x6c9/0x15c0 [ 131.734834] #2: (&type->s_umount_key#29){.+.+..}, at: [<ffffffff817d7c00>] trylock_super+0x20/0x100 [ 131.735638] 4 locks held by rs:main Q:Reg/6703: [ 131.736051] #0: (&f->f_pos_lock){+.+.+.}, at: [<ffffffff8182ee46>] __fdget_pos+0xd6/0x100 [ 131.736707] #1: (sb_writers#3){.+.+.+}, at: [<ffffffff817d6c3c>] __sb_start_write+0xec/0x130 [ 131.737760] #2: (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [<ffffffff81a5b40a>] ext4_file_write_iter+0x15a/0xc80 [ 131.738543] #3: (jbd2_handle){+.+...}, at: [<ffffffff81bbb6c9>] start_this_handle+0x5f9/0x1420 [ 131.754552] 3 locks held by bash/6809: [ 131.754804] #0: (sb_writers#6){.+.+.+}, at: [<ffffffff817d6c3c>] __sb_start_write+0xec/0x130 [ 131.755504] #1: (rcu_read_lock){......}, at: [<ffffffff82fe63f0>] __handle_sysrq+0x0/0x4d0 [ 131.756528] #2: (tasklist_lock){.+.+..}, at: [<ffffffff8145c350>] debug_show_all_locks+0x70/0x280 [ 131.757329] 3 locks held by sshd/6823: [ 131.757603] #0: (sb_writers#3){.+.+.+}, at: [<ffffffff817d6c3c>] __sb_start_write+0xec/0x130 [ 131.758269] #1: (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [<ffffffff81a5b40a>] ext4_file_write_iter+0x15a/0xc80 [ 131.759149] #2: (jbd2_handle){+.+...}, at: [<ffffffff81bbb6c9>] start_this_handle+0x5f9/0x1420 The problem seems to be with task 151 which holds some critical mutex and then blocks in bt_get for minutes: root@dvyukov-z840:~# cat /proc/151/stack [<ffffffff82bc5f92>] bt_get+0x2d2/0x700 [<ffffffff82bc6cab>] blk_mq_get_tag+0x11b/0x360 [<ffffffff82bb5534>] __blk_mq_alloc_request+0x24/0xa20 [<ffffffff82bbd426>] blk_mq_map_request+0x786/0xc10 [<ffffffff82bc116e>] blk_sq_make_request+0x18e/0xdb0 [<ffffffff82b92fa0>] generic_make_request+0x310/0x800 [<ffffffff82b935ee>] submit_bio+0x15e/0x4c0 [<ffffffff81885100>] submit_bh_wbc+0x400/0x560 [<ffffffff8188a9a6>] __block_write_full_page.constprop.52+0x386/0x8c0 [<ffffffff8188b0d9>] block_write_full_page+0x1f9/0x2a0 [<ffffffff8188d524>] blkdev_writepage+0x24/0x30 [<ffffffff8167d091>] __writepage+0x61/0xc0 [<ffffffff8167ff9a>] write_cache_pages+0x4ca/0xf20 [<ffffffff81680abc>] generic_writepages+0xcc/0x120 [<ffffffff8188d48d>] blkdev_writepages+0x7d/0xd0 [<ffffffff81688f47>] do_writepages+0x97/0x100 [<ffffffff818655e8>] __writeback_single_inode+0xe8/0x1190 [<ffffffff81867a7d>] writeback_sb_inodes+0x44d/0x11f0 [<ffffffff81868924>] __writeback_inodes_wb+0x104/0x1e0 [<ffffffff8186932e>] wb_writeback+0x7ce/0xc90 [<ffffffff8186b2ee>] wb_workfn+0xa0e/0x1000 [<ffffffff813afe7e>] process_one_work+0x78e/0x15c0 [<ffffffff813b0d8b>] worker_thread+0xdb/0xfc0 [<ffffffff813c195f>] kthread+0x23f/0x2d0 [<ffffffff866d1b2f>] ret_from_fork+0x3f/0x70 [<ffffffffffffffff>] 0xffffffffffffffff root@dvyukov-z840:~# cat /proc/6823/stack [<ffffffff81bc0f30>] do_get_write_access+0x6f0/0x10e0 [<ffffffff81bc196d>] jbd2_journal_get_write_access+0x4d/0xa0 [<ffffffff81b35821>] __ext4_journal_get_write_access+0x51/0x90 [<ffffffff81a7f3d3>] ext4_reserve_inode_write+0xe3/0x180 [<ffffffff81a7f5a3>] ext4_mark_inode_dirty+0x133/0xb20 [<ffffffff81a8cb11>] ext4_dirty_inode+0x71/0xa0 [<ffffffff81864915>] __mark_inode_dirty+0x575/0x1160 [<ffffffff8182442a>] generic_update_time+0x1aa/0x270 [<ffffffff8182482c>] file_update_time+0x25c/0x3e0 [<ffffffff81660e11>] __generic_file_write_iter+0x1d1/0x5a0 [<ffffffff81a5b597>] ext4_file_write_iter+0x2e7/0xc80 [<ffffffff817ccd22>] __vfs_write+0x302/0x4b0 [<ffffffff817ce607>] vfs_write+0x167/0x4a0 [<ffffffff817d18f1>] SyS_write+0x111/0x220 [<ffffffff866d1776>] entry_SYSCALL_64_fastpath+0x16/0x7a [<ffffffffffffffff>] 0xffffffffffffffff root@dvyukov-z840:~# cat /proc/6703/stack [<ffffffff81bc0f30>] do_get_write_access+0x6f0/0x10e0 [<ffffffff81bc196d>] jbd2_journal_get_write_access+0x4d/0xa0 [<ffffffff81b35821>] __ext4_journal_get_write_access+0x51/0x90 [<ffffffff81a7f3d3>] ext4_reserve_inode_write+0xe3/0x180 [<ffffffff81a7f5a3>] ext4_mark_inode_dirty+0x133/0xb20 [<ffffffff81a8cb11>] ext4_dirty_inode+0x71/0xa0 [<ffffffff81864915>] __mark_inode_dirty+0x575/0x1160 [<ffffffff8182442a>] generic_update_time+0x1aa/0x270 [<ffffffff8182482c>] file_update_time+0x25c/0x3e0 [<ffffffff81660e11>] __generic_file_write_iter+0x1d1/0x5a0 [<ffffffff81a5b597>] ext4_file_write_iter+0x2e7/0xc80 [<ffffffff817ccd22>] __vfs_write+0x302/0x4b0 [<ffffffff817ce607>] vfs_write+0x167/0x4a0 [<ffffffff817d18f1>] SyS_write+0x111/0x220 [<ffffffff866d1776>] entry_SYSCALL_64_fastpath+0x16/0x7a [<ffffffffffffffff>] 0xffffffffffffffff Is this hang in bt_get legitimate? If yes, is it possible to block in bt_get without holds the other mutexes? This happens in qemu and I don't have any actual scsi devices afaict. I am on commit 8e0f93cda48ed054e1216bab5c60017e1a5fc1e8. -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html