From: Roy Shterman <roys@xxxxxxxxxxxxxxxxx> [ Upstream commit b227c59b9b5b8ae52639c8980af853d2f654f90a ] We need to ensure that delete_work will be hosted on a different workqueue than all the works we flush or cancel from it. Otherwise we may hit a circular dependency warning [1]. Also, given that delete_work flushes reset_work, host reset_work on nvme_reset_wq and delete_work on nvme_delete_wq. In addition, fix the flushing in the individual drivers to flush nvme_delete_wq when draining queued deletes. [1]: [ 178.491942] ============================================= [ 178.492718] [ INFO: possible recursive locking detected ] [ 178.493495] 4.9.0-rc4-c844263313a8-lb #3 Tainted: G OE [ 178.494382] --------------------------------------------- [ 178.495160] kworker/5:1/135 is trying to acquire lock: [ 178.495894] ( [ 178.496120] "nvme-wq" [ 178.496471] ){++++.+} [ 178.496599] , at: [ 178.496921] [<ffffffffa70ac206>] flush_work+0x1a6/0x2d0 [ 178.497670] but task is already holding lock: [ 178.498499] ( [ 178.498724] "nvme-wq" [ 178.499074] ){++++.+} [ 178.499202] , at: [ 178.499520] [<ffffffffa70ad6c2>] process_one_work+0x162/0x6a0 [ 178.500343] other info that might help us debug this: [ 178.501269] Possible unsafe locking scenario: [ 178.502113] CPU0 [ 178.502472] ---- [ 178.502829] lock( [ 178.503115] "nvme-wq" [ 178.503467] ); [ 178.503716] lock( [ 178.504001] "nvme-wq" [ 178.504353] ); [ 178.504601] *** DEADLOCK *** [ 178.505441] May be due to missing lock nesting notation [ 178.506453] 2 locks held by kworker/5:1/135: [ 178.507068] #0: [ 178.507330] ( [ 178.507598] "nvme-wq" [ 178.507726] ){++++.+} [ 178.508079] , at: [ 178.508173] [<ffffffffa70ad6c2>] process_one_work+0x162/0x6a0 [ 178.509004] #1: [ 178.509265] ( [ 178.509532] (&ctrl->delete_work) [ 178.509795] ){+.+.+.} [ 178.510145] , at: [ 178.510239] [<ffffffffa70ad6c2>] process_one_work+0x162/0x6a0 [ 178.511070] stack backtrace: : [ 178.511693] CPU: 5 PID: 135 Comm: kworker/5:1 Tainted: G OE 4.9.0-rc4-c844263313a8-lb #3 [ 178.512974] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.1-1ubuntu1 04/01/2014 [ 178.514247] Workqueue: nvme-wq nvme_del_ctrl_work [nvme_tcp] [ 178.515071] ffffc2668175bae0 ffffffffa7450823 ffffffffa88abd80 ffffffffa88abd80 [ 178.516195] ffffc2668175bb98 ffffffffa70eb012 ffffffffa8d8d90d ffff9c472e9ea700 [ 178.517318] ffff9c472e9ea700 ffff9c4700000000 ffff9c4700007200 ab83be61bec0d50e [ 178.518443] Call Trace: [ 178.518807] [<ffffffffa7450823>] dump_stack+0x85/0xc2 [ 178.519542] [<ffffffffa70eb012>] __lock_acquire+0x17d2/0x18f0 [ 178.520377] [<ffffffffa75839a7>] ? serial8250_console_putchar+0x27/0x30 [ 178.521330] [<ffffffffa7583980>] ? wait_for_xmitr+0xa0/0xa0 [ 178.522174] [<ffffffffa70ac1eb>] ? flush_work+0x18b/0x2d0 [ 178.522975] [<ffffffffa70eb7cb>] lock_acquire+0x11b/0x220 [ 178.523753] [<ffffffffa70ac206>] ? flush_work+0x1a6/0x2d0 [ 178.524535] [<ffffffffa70ac229>] flush_work+0x1c9/0x2d0 [ 178.525291] [<ffffffffa70ac206>] ? flush_work+0x1a6/0x2d0 [ 178.526077] [<ffffffffa70a9cf0>] ? flush_workqueue_prep_pwqs+0x220/0x220 [ 178.527040] [<ffffffffa70ae7cf>] __cancel_work_timer+0x10f/0x1d0 [ 178.527907] [<ffffffffa70fecb9>] ? vprintk_default+0x29/0x40 [ 178.528726] [<ffffffffa71cb507>] ? printk+0x48/0x50 [ 178.529434] [<ffffffffa70ae8c3>] cancel_delayed_work_sync+0x13/0x20 [ 178.530381] [<ffffffffc042100b>] nvme_stop_ctrl+0x5b/0x70 [nvme_core] [ 178.531314] [<ffffffffc0403dcc>] nvme_del_ctrl_work+0x2c/0x50 [nvme_tcp] [ 178.532271] [<ffffffffa70ad741>] process_one_work+0x1e1/0x6a0 [ 178.533101] [<ffffffffa70ad6c2>] ? process_one_work+0x162/0x6a0 [ 178.533954] [<ffffffffa70adc4e>] worker_thread+0x4e/0x490 [ 178.534735] [<ffffffffa70adc00>] ? process_one_work+0x6a0/0x6a0 [ 178.535588] [<ffffffffa70adc00>] ? process_one_work+0x6a0/0x6a0 [ 178.536441] [<ffffffffa70b48cf>] kthread+0xff/0x120 [ 178.537149] [<ffffffffa70b47d0>] ? kthread_park+0x60/0x60 [ 178.538094] [<ffffffffa70b47d0>] ? kthread_park+0x60/0x60 [ 178.538900] [<ffffffffa78e332a>] ret_from_fork+0x2a/0x40 Signed-off-by: Roy Shterman <roys@xxxxxxxxxxxxxxxxx> Signed-off-by: Sagi Grimberg <sagi@xxxxxxxxxxx> Signed-off-by: Christoph Hellwig <hch@xxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- drivers/nvme/host/core.c | 44 +++++++++++++++++++++++++++++++++++++++----- drivers/nvme/host/nvme.h | 2 ++ drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/loop.c | 2 +- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 935593032123..93a4fa053e7f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -65,9 +65,26 @@ static bool streams; module_param(streams, bool, 0644); MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); +/* + * nvme_wq - hosts nvme related works that are not reset or delete + * nvme_reset_wq - hosts nvme reset works + * nvme_delete_wq - hosts nvme delete works + * + * nvme_wq will host works such are scan, aen handling, fw activation, + * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq + * runs reset works which also flush works hosted on nvme_wq for + * serialization purposes. nvme_delete_wq host controller deletion + * works which flush reset works for serialization. + */ struct workqueue_struct *nvme_wq; EXPORT_SYMBOL_GPL(nvme_wq); +struct workqueue_struct *nvme_reset_wq; +EXPORT_SYMBOL_GPL(nvme_reset_wq); + +struct workqueue_struct *nvme_delete_wq; +EXPORT_SYMBOL_GPL(nvme_delete_wq); + static DEFINE_IDA(nvme_subsystems_ida); static LIST_HEAD(nvme_subsystems); static DEFINE_MUTEX(nvme_subsystems_lock); @@ -89,7 +106,7 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->reset_work)) + if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) return -EBUSY; return 0; } @@ -122,7 +139,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->delete_work)) + if (!queue_work(nvme_delete_wq, &ctrl->delete_work)) return -EBUSY; return 0; } @@ -3491,16 +3508,26 @@ EXPORT_SYMBOL_GPL(nvme_reinit_tagset); int __init nvme_core_init(void) { - int result; + int result = -ENOMEM; nvme_wq = alloc_workqueue("nvme-wq", WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!nvme_wq) - return -ENOMEM; + goto out; + + nvme_reset_wq = alloc_workqueue("nvme-reset-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvme_reset_wq) + goto destroy_wq; + + nvme_delete_wq = alloc_workqueue("nvme-delete-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvme_delete_wq) + goto destroy_reset_wq; result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); if (result < 0) - goto destroy_wq; + goto destroy_delete_wq; nvme_class = class_create(THIS_MODULE, "nvme"); if (IS_ERR(nvme_class)) { @@ -3519,8 +3546,13 @@ destroy_class: class_destroy(nvme_class); unregister_chrdev: unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); +destroy_delete_wq: + destroy_workqueue(nvme_delete_wq); +destroy_reset_wq: + destroy_workqueue(nvme_reset_wq); destroy_wq: destroy_workqueue(nvme_wq); +out: return result; } @@ -3530,6 +3562,8 @@ void nvme_core_exit(void) class_destroy(nvme_subsys_class); class_destroy(nvme_class); unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); + destroy_workqueue(nvme_delete_wq); + destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 55c49a1aa231..d320df9ab588 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -32,6 +32,8 @@ extern unsigned int admin_timeout; #define NVME_KATO_GRACE 10 extern struct workqueue_struct *nvme_wq; +extern struct workqueue_struct *nvme_reset_wq; +extern struct workqueue_struct *nvme_delete_wq; enum { NVME_NS_LBA = 0, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2a0bba7f50cf..145aa97067d4 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -2040,7 +2040,7 @@ static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data) } mutex_unlock(&nvme_rdma_ctrl_mutex); - flush_workqueue(nvme_wq); + flush_workqueue(nvme_delete_wq); } static struct ib_client nvme_rdma_ib_client = { diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 1e21b286f299..5d8054f9a412 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -716,7 +716,7 @@ static void __exit nvme_loop_cleanup_module(void) nvme_delete_ctrl(&ctrl->ctrl); mutex_unlock(&nvme_loop_ctrl_mutex); - flush_workqueue(nvme_wq); + flush_workqueue(nvme_delete_wq); } module_init(nvme_loop_init_module); -- 2.15.1