The commit c0feea594e05 ("workqueue: don't skip lockdep work dependency in cancel_work_sync()") restored a lockdep check and it triggered deadlock warning in sync_io_queues() path. This function locks namespaces_rwsem to traverse namespace list and calls blk_sync_queue() for each namespace. blk_sync_queue() calls cancel_work_sync() to wait for nvme_timeout() completion which has paths to lock the namespaces_rwsem again. Hence, the deadlock warning was reported. This deadlock is not expected to happen in real use cases since the queues to call blk_sync_queue() are already quiesced at that point. Left issue is confusion by the deadlock warning. It is not ideal to suppress the warning with lockdep_off/on() since it will hide unseen deadlock scenarios. It is desired to suppress the warning without losing deadlock detection ability. Instead of lockdep_off/on(), this patch suppresses the warning by adding another lock named 'namespaces_sync_io_queues_mutex'. It works together with namespaces_rwsem. Lock only namespaces_rwsem for fast read access to the namespaces in hot paths. Lock both the locks to modify the list. Lock only namespaces_sync_io_queues_mutex to call blk_sync_queue() so that namespaces_rwsem can be locked in nvme_timeout() without the deadlock warning. Link: https://lore.kernel.org/linux-block/20220930001943.zdbvolc3gkekfmcv@shindev/ Suggested-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@xxxxxxx> --- drivers/nvme/host/core.c | 21 +++++++++++++++++++-- drivers/nvme/host/nvme.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8d5a7ae19844..4c535deaf269 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4234,9 +4234,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) if (nvme_update_ns_info(ns, info)) goto out_unlink_ns; + mutex_lock(&ctrl->namespaces_sync_io_queues_mutex); down_write(&ctrl->namespaces_rwsem); nvme_ns_add_to_ctrl_list(ns); up_write(&ctrl->namespaces_rwsem); + mutex_unlock(&ctrl->namespaces_sync_io_queues_mutex); nvme_get_ctrl(ctrl); if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups)) @@ -4252,9 +4254,11 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) out_cleanup_ns_from_list: nvme_put_ctrl(ctrl); + mutex_lock(&ctrl->namespaces_sync_io_queues_mutex); down_write(&ctrl->namespaces_rwsem); list_del_init(&ns->list); up_write(&ctrl->namespaces_rwsem); + mutex_unlock(&ctrl->namespaces_sync_io_queues_mutex); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); @@ -4304,9 +4308,11 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); + mutex_lock(&ns->ctrl->namespaces_sync_io_queues_mutex); down_write(&ns->ctrl->namespaces_rwsem); list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); + mutex_unlock(&ns->ctrl->namespaces_sync_io_queues_mutex); if (last_path) nvme_mpath_shutdown_disk(ns->head); @@ -4399,12 +4405,14 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, struct nvme_ns *ns, *next; LIST_HEAD(rm_list); + mutex_lock(&ctrl->namespaces_sync_io_queues_mutex); down_write(&ctrl->namespaces_rwsem); list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { if (ns->head->ns_id > nsid || test_bit(NVME_NS_DEAD, &ns->flags)) list_move_tail(&ns->list, &rm_list); } up_write(&ctrl->namespaces_rwsem); + mutex_unlock(&ctrl->namespaces_sync_io_queues_mutex); list_for_each_entry_safe(ns, next, &rm_list, list) nvme_ns_remove(ns); @@ -4565,9 +4573,11 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) /* this is a no-op when called from the controller reset handler */ nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO); + mutex_lock(&ctrl->namespaces_sync_io_queues_mutex); down_write(&ctrl->namespaces_rwsem); list_splice_init(&ctrl->namespaces, &ns_list); up_write(&ctrl->namespaces_rwsem); + mutex_unlock(&ctrl->namespaces_sync_io_queues_mutex); list_for_each_entry_safe(ns, next, &ns_list, list) nvme_ns_remove(ns); @@ -4901,6 +4911,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, INIT_LIST_HEAD(&ctrl->namespaces); xa_init(&ctrl->cels); init_rwsem(&ctrl->namespaces_rwsem); + mutex_init(&ctrl->namespaces_sync_io_queues_mutex); ctrl->dev = dev; ctrl->ops = ops; ctrl->quirks = quirks; @@ -5121,10 +5132,16 @@ void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - down_read(&ctrl->namespaces_rwsem); + /* + * Guard ctrl->namespaces with namespaces_sync_io_queues_mutex instead + * of namespaces_rwsem to avoid deadlock warning. namespace_rwsem lock + * here for cancel_work_sync() via blk_sync_queue() would conflict with + * nvme_timeout() which locks namespaces_rwsem. + */ + mutex_lock(&ctrl->namespaces_sync_io_queues_mutex); list_for_each_entry(ns, &ctrl->namespaces, list) blk_sync_queue(ns->queue); - up_read(&ctrl->namespaces_rwsem); + mutex_unlock(&ctrl->namespaces_sync_io_queues_mutex); } EXPORT_SYMBOL_GPL(nvme_sync_io_queues); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1bdf714dcd9e..3f484b6c21d2 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -250,6 +250,7 @@ struct nvme_ctrl { struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; struct rw_semaphore namespaces_rwsem; + struct mutex namespaces_sync_io_queues_mutex; struct device ctrl_device; struct device *device; /* char device */ #ifdef CONFIG_NVME_HWMON -- 2.37.1