When admin commands are used in EH for recovering controller, we have to cover their timeout and can't depend on block's timeout since deadlock may be caused when these commands are timed-out by block layer again. Cc: James Smart <james.smart@xxxxxxxxxxxx> Cc: Jianchao Wang <jianchao.w.wang@xxxxxxxxxx> Cc: Christoph Hellwig <hch@xxxxxx> Cc: Sagi Grimberg <sagi@xxxxxxxxxxx> Cc: linux-nvme@xxxxxxxxxxxxxxxxxxx Cc: Laurence Oberman <loberman@xxxxxxxxxx> Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx> --- drivers/nvme/host/pci.c | 81 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 70 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fbc71fac6f1e..ff09b1c760ea 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1733,21 +1733,28 @@ static inline void nvme_release_cmb(struct nvme_dev *dev) } } -static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) +static void nvme_init_set_host_mem_cmd(struct nvme_dev *dev, + struct nvme_command *c, u32 bits) { u64 dma_addr = dev->host_mem_descs_dma; + + memset(c, 0, sizeof(*c)); + c->features.opcode = nvme_admin_set_features; + c->features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); + c->features.dword11 = cpu_to_le32(bits); + c->features.dword12 = cpu_to_le32(dev->host_mem_size >> + ilog2(dev->ctrl.page_size)); + c->features.dword13 = cpu_to_le32(lower_32_bits(dma_addr)); + c->features.dword14 = cpu_to_le32(upper_32_bits(dma_addr)); + c->features.dword15 = cpu_to_le32(dev->nr_host_mem_descs); +} + +static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) +{ struct nvme_command c; int ret; - memset(&c, 0, sizeof(c)); - c.features.opcode = nvme_admin_set_features; - c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); - c.features.dword11 = cpu_to_le32(bits); - c.features.dword12 = cpu_to_le32(dev->host_mem_size >> - ilog2(dev->ctrl.page_size)); - c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr)); - c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr)); - c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs); + nvme_init_set_host_mem_cmd(dev, &c, bits); ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); if (ret) { @@ -1758,6 +1765,58 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) return ret; } +static void nvme_set_host_mem_end_io(struct request *rq, blk_status_t sts) +{ + struct completion *waiting = rq->end_io_data; + + rq->end_io_data = NULL; + + /* + * complete last, if this is a stack request the process (and thus + * the rq pointer) could be invalid right after this complete() + */ + complete(waiting); +} + +/* + * This function can only be used inside nvme_dev_disable() when timeout + * may not work, then this function has to cover the timeout by itself. + * + * When wait_for_completion_io_timeout() returns 0 and timeout happens, + * this request will be completed after controller is shutdown. + */ +static int nvme_set_host_mem_timeout(struct nvme_dev *dev, u32 bits) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct nvme_command c; + struct request_queue *q = dev->ctrl.admin_q; + struct request *req; + int ret; + + nvme_init_set_host_mem_cmd(dev, &c, bits); + + req = nvme_alloc_request(q, &c, 0, NVME_QID_ANY); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->timeout = ADMIN_TIMEOUT; + req->end_io_data = &wait; + + blk_execute_rq_nowait(q, NULL, req, false, + nvme_set_host_mem_end_io); + ret = wait_for_completion_io_timeout(&wait, ADMIN_TIMEOUT); + if (ret > 0) { + if (nvme_req(req)->flags & NVME_REQ_CANCELLED) + ret = -EINTR; + else + ret = nvme_req(req)->status; + blk_mq_free_request(req); + } else + ret = -EINTR; + + return ret; +} + static void nvme_free_host_mem(struct nvme_dev *dev) { int i; @@ -2216,7 +2275,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * but I'd rather be safe than sorry.. */ if (dev->host_mem_descs) - nvme_set_host_mem(dev, 0); + nvme_set_host_mem_timeout(dev, 0); nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } -- 2.9.5