Hi, this patch is meant to show that, if the body of the hook exit_icq is executed from inside that hook, and not as deferred work, then a circular deadlock occurs. It happens if, on a CPU - the body of icq_exit takes the scheduler lock, - it does so from inside the exit_icq hook, which is invoked with the queue lock held while, on another CPU - bfq_bio_merge, after taking the scheduler lock, invokes bfq_bic_lookup, which, in its turn, takes the queue lock. bfq_bic_lookup needs to take such a lock, because it invokes ioc_lookup_icq. For more details, here is a lockdep report, right before the deadlock did occur. [ 44.059877] ====================================================== [ 44.124922] [ INFO: possible circular locking dependency detected ] [ 44.125795] 4.10.0-rc5-bfq-mq+ #38 Not tainted [ 44.126414] ------------------------------------------------------- [ 44.127291] sync/2043 is trying to acquire lock: [ 44.128918] (&(&bfqd->lock)->rlock){-.-...}, at: [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140 [ 44.134052] [ 44.134052] but task is already holding lock: [ 44.134868] (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0 [ 44.136292] [ 44.136292] which lock already depends on the new lock. [ 44.136292] [ 44.137411] [ 44.137411] the existing dependency chain (in reverse order) is: [ 44.139936] [ 44.139936] -> #1 (&(&q->__queue_lock)->rlock){-.....}: [ 44.141512] [ 44.141517] [<ffffffff900ee6bb>] lock_acquire+0x11b/0x220 [ 44.142569] [ 44.142578] [<ffffffff90943e66>] _raw_spin_lock_irqsave+0x56/0x90 [ 44.146365] [ 44.146371] [<ffffffff904822f5>] bfq_bic_lookup.isra.112+0x25/0x60 [ 44.147523] [ 44.147525] [<ffffffff9048245d>] bfq_request_merge+0x3d/0xe0 [ 44.149738] [ 44.149742] [<ffffffff90439aef>] elv_merge+0xcf/0xe0 [ 44.150726] [ 44.150728] [<ffffffff90453c46>] blk_mq_sched_try_merge+0x36/0x150 [ 44.151878] [ 44.151881] [<ffffffff9047fb6a>] bfq_bio_merge+0x5a/0xa0 [ 44.153913] [ 44.153916] [<ffffffff90454500>] __blk_mq_sched_bio_merge+0x60/0x70 [ 44.155089] [ 44.155091] [<ffffffff9044e6c7>] blk_sq_make_request+0x277/0xa90 [ 44.157455] [ 44.157458] [<ffffffff90440846>] generic_make_request+0xf6/0x2b0 [ 44.158597] [ 44.158599] [<ffffffff90440a73>] submit_bio+0x73/0x150 [ 44.160537] [ 44.160541] [<ffffffff90366e0b>] ext4_mpage_readpages+0x48b/0x950 [ 44.162961] [ 44.162971] [<ffffffff90313236>] ext4_readpages+0x36/0x40 [ 44.164037] [ 44.164040] [<ffffffff901eca4e>] __do_page_cache_readahead+0x2ae/0x3a0 [ 44.165224] [ 44.165227] [<ffffffff901ecc4e>] ondemand_readahead+0x10e/0x4b0 [ 44.166334] [ 44.166336] [<ffffffff901ed1a1>] page_cache_sync_readahead+0x31/0x50 [ 44.167502] [ 44.167503] [<ffffffff901dcaed>] generic_file_read_iter+0x68d/0x8d0 [ 44.168661] [ 44.168663] [<ffffffff9030e6c7>] ext4_file_read_iter+0x37/0xc0 [ 44.169760] [ 44.169764] [<ffffffff9026e7c3>] __vfs_read+0xe3/0x150 [ 44.171987] [ 44.171990] [<ffffffff9026ee58>] vfs_read+0xa8/0x170 [ 44.178999] [ 44.179005] [<ffffffff902761e8>] prepare_binprm+0x118/0x200 [ 44.180080] [ 44.180083] [<ffffffff90277bcb>] do_execveat_common.isra.39+0x56b/0xa00 [ 44.184409] [ 44.184414] [<ffffffff902782fa>] SyS_execve+0x3a/0x50 [ 44.185398] [ 44.185401] [<ffffffff90003e49>] do_syscall_64+0x69/0x160 [ 44.187349] [ 44.187353] [<ffffffff9094408d>] return_from_SYSCALL_64+0x0/0x7a [ 44.188475] [ 44.188475] -> #0 (&(&bfqd->lock)->rlock){-.-...}: [ 44.199960] [ 44.199966] [<ffffffff900edd24>] __lock_acquire+0x15e4/0x1890 [ 44.201056] [ 44.201058] [<ffffffff900ee6bb>] lock_acquire+0x11b/0x220 [ 44.202099] [ 44.202101] [<ffffffff909434da>] _raw_spin_lock_irq+0x4a/0x80 [ 44.203197] [ 44.203200] [<ffffffff90484195>] bfq_exit_icq_bfqq+0x55/0x140 [ 44.204848] [ 44.204851] [<ffffffff9048429c>] bfq_exit_icq+0x1c/0x30 [ 44.205863] [ 44.205866] [<ffffffff90446e68>] ioc_exit_icq+0x38/0x50 [ 44.206881] [ 44.206883] [<ffffffff9044739a>] put_io_context_active+0x7a/0xc0 [ 44.215156] sh (2042): drop_caches: 3 [ 44.216738] [ 44.216742] [<ffffffff90447428>] exit_io_context+0x48/0x50 [ 44.217497] [ 44.217500] [<ffffffff90095727>] do_exit+0x787/0xc50 [ 44.218207] [ 44.218209] [<ffffffff90095c80>] do_group_exit+0x50/0xd0 [ 44.218969] [ 44.218970] [<ffffffff90095d14>] SyS_exit_group+0x14/0x20 [ 44.219716] [ 44.219718] [<ffffffff90943fc5>] entry_SYSCALL_64_fastpath+0x23/0xc6 [ 44.220550] [ 44.220550] other info that might help us debug this: [ 44.220550] [ 44.223477] Possible unsafe locking scenario: [ 44.223477] [ 44.224281] CPU0 CPU1 [ 44.224903] ---- ---- [ 44.225523] lock(&(&q->__queue_lock)->rlock); [ 44.226144] lock(&(&bfqd->lock)->rlock); [ 44.227051] lock(&(&q->__queue_lock)->rlock); [ 44.228019] lock(&(&bfqd->lock)->rlock); [ 44.228643] [ 44.228643] *** DEADLOCK *** [ 44.228643] [ 44.230136] 2 locks held by sync/2043: [ 44.230591] #0: (&(&ioc->lock)->rlock/1){......}, at: [<ffffffff90447358>] put_io_context_active+0x38/0xc0 [ 44.231553] #1: (&(&q->__queue_lock)->rlock){-.....}, at: [<ffffffff9044738e>] put_io_context_active+0x6e/0xc0 [ 44.232542] [ 44.232542] stack backtrace: [ 44.232974] CPU: 1 PID: 2043 Comm: sync Not tainted 4.10.0-rc5-bfq-mq+ #38 [ 44.238224] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 44.239364] Call Trace: [ 44.239717] dump_stack+0x85/0xc2 [ 44.240186] print_circular_bug+0x1e3/0x250 [ 44.240773] __lock_acquire+0x15e4/0x1890 [ 44.241350] lock_acquire+0x11b/0x220 [ 44.241867] ? bfq_exit_icq_bfqq+0x55/0x140 [ 44.242455] _raw_spin_lock_irq+0x4a/0x80 [ 44.243018] ? bfq_exit_icq_bfqq+0x55/0x140 [ 44.243629] bfq_exit_icq_bfqq+0x55/0x140 [ 44.244192] bfq_exit_icq+0x1c/0x30 [ 44.244713] ioc_exit_icq+0x38/0x50 [ 44.246518] put_io_context_active+0x7a/0xc0 [ 44.247116] exit_io_context+0x48/0x50 [ 44.247647] do_exit+0x787/0xc50 [ 44.248103] do_group_exit+0x50/0xd0 [ 44.249055] SyS_exit_group+0x14/0x20 [ 44.249568] entry_SYSCALL_64_fastpath+0x23/0xc6 [ 44.250208] RIP: 0033:0x7fd70b22ab98 [ 44.250704] RSP: 002b:00007ffc9cc43878 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 [ 44.251745] RAX: ffffffffffffffda RBX: 00007fd70b523620 RCX: 00007fd70b22ab98 [ 44.252730] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000 [ 44.253713] RBP: 00007fd70b523620 R08: 00000000000000e7 R09: ffffffffffffff98 [ 44.254690] R10: 00007ffc9cc437c8 R11: 0000000000000246 R12: 0000000000000000 [ 44.255674] R13: 00007fd70b523c40 R14: 0000000000000000 R15: 0000000000000000 Signed-off-by: Paolo Valente <paolo.valente@xxxxxxxxxx> --- block/bfq-mq-iosched.c | 34 +++------------------------------- block/bfq-mq.h | 3 --- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/block/bfq-mq-iosched.c b/block/bfq-mq-iosched.c index 05a12b6..6e79bb8 100644 --- a/block/bfq-mq-iosched.c +++ b/block/bfq-mq-iosched.c @@ -4001,28 +4001,13 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) } } -static void bfq_exit_icq_body(struct work_struct *work) -{ - struct bfq_io_cq *bic = - container_of(work, struct bfq_io_cq, exit_icq_work); - - bfq_exit_icq_bfqq(bic, true); - bfq_exit_icq_bfqq(bic, false); -} - -static void bfq_init_icq(struct io_cq *icq) -{ - struct bfq_io_cq *bic = icq_to_bic(icq); - - INIT_WORK(&bic->exit_icq_work, bfq_exit_icq_body); -} - static void bfq_exit_icq(struct io_cq *icq) { struct bfq_io_cq *bic = icq_to_bic(icq); BUG_ON(!bic); - kblockd_schedule_work(&bic->exit_icq_work); + bfq_exit_icq_bfqq(bic, true); + bfq_exit_icq_bfqq(bic, false); } /* @@ -4934,21 +4919,9 @@ static void bfq_exit_queue(struct elevator_queue *e) BUG_ON(bfqd->in_service_queue); BUG_ON(!list_empty(&bfqd->active_list)); - list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) { - if (bfqq->bic) /* bfqqs without bic are handled below */ - cancel_work_sync(&bfqq->bic->exit_icq_work); - } - spin_lock_irq(&bfqd->lock); - list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) { + list_for_each_entry_safe(bfqq, n, &bfqd->idle_list, bfqq_list) bfq_deactivate_bfqq(bfqd, bfqq, false, false); - /* - * Make sure that deferred exit_icq_work completes - * without errors for bfq_queues without bic - */ - if (!bfqq->bic) - bfqq->bfqd = NULL; - } spin_unlock_irq(&bfqd->lock); hrtimer_cancel(&bfqd->idle_slice_timer); @@ -5384,7 +5357,6 @@ static struct elevator_type iosched_bfq_mq = { .ops.mq = { .get_rq_priv = bfq_get_rq_private, .put_rq_priv = bfq_put_rq_private, - .init_icq = bfq_init_icq, .exit_icq = bfq_exit_icq, .insert_requests = bfq_insert_requests, .dispatch_request = bfq_dispatch_request, diff --git a/block/bfq-mq.h b/block/bfq-mq.h index 6e1c0d8..41b9d33 100644 --- a/block/bfq-mq.h +++ b/block/bfq-mq.h @@ -345,9 +345,6 @@ struct bfq_io_cq { uint64_t blkcg_serial_nr; /* the current blkcg serial */ #endif - /* delayed work to exec the body of the the exit_icq handler */ - struct work_struct exit_icq_work; - /* * Snapshot of the idle window before merging; taken to * remember this value while the queue is merged, so as to be -- 2.10.0