On Thu, Aug 13, 2020 at 01:47:04PM -0400, Mike Snitzer wrote: > This is just a tweak to improve the high-level fault tree of core NVMe > error handling. No functional change, but for such basic errors, > avoiding entering nvme_failover_req is meaningful on a code flow level. > Makes code to handle errors that need local retry clearer by being more > structured, less circuitous. > > Allows NVMe core's handling of such errors to be more explicit and live > in core.c rather than multipath.c -- so things like ACRE handling can be > made explicitly part of core and not nested under nvme_failover_req's > relatively obscure failsafe that returns false for anything it doesn't > care about. If we're going that way I'd rather do something like the (untested) patch below that adds a dispostion function with a function that decides it and then just switches on it: diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 88cff309d8e4f0..a740320f0d4ee7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -241,17 +241,6 @@ static blk_status_t nvme_error_status(u16 status) } } -static inline bool nvme_req_needs_retry(struct request *req) -{ - if (blk_noretry_request(req)) - return false; - if (nvme_req(req)->status & NVME_SC_DNR) - return false; - if (nvme_req(req)->retries >= nvme_max_retries) - return false; - return true; -} - static void nvme_retry_req(struct request *req) { struct nvme_ns *ns = req->q->queuedata; @@ -268,33 +257,75 @@ static void nvme_retry_req(struct request *req) blk_mq_delay_kick_requeue_list(req->q, delay); } -void nvme_complete_rq(struct request *req) +enum nvme_disposition { + COMPLETE, + RETRY, + REDIRECT_ANA, + REDIRECT_TMP, +}; + +static inline enum nvme_disposition nvme_req_disposition(struct request *req) +{ + if (likely(nvme_req(req)->status == 0)) + return COMPLETE; + + if (blk_noretry_request(req) || + (nvme_req(req)->status & NVME_SC_DNR) || + nvme_req(req)->retries >= nvme_max_retries) + return COMPLETE; + + if (req->cmd_flags & REQ_NVME_MPATH) { + switch (nvme_req(req)->status & 0x7ff) { + case NVME_SC_ANA_TRANSITION: + case NVME_SC_ANA_INACCESSIBLE: + case NVME_SC_ANA_PERSISTENT_LOSS: + return REDIRECT_ANA; + case NVME_SC_HOST_PATH_ERROR: + case NVME_SC_HOST_ABORTED_CMD: + return REDIRECT_TMP; + } + } + + if (blk_queue_dying(req->q)) + return COMPLETE; + return RETRY; +} + +static inline void nvme_complete_req(struct request *req) { blk_status_t status = nvme_error_status(nvme_req(req)->status); - trace_nvme_complete_rq(req); + if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && + req_op(req) == REQ_OP_ZONE_APPEND) + req->__sector = nvme_lba_to_sect(req->q->queuedata, + le64_to_cpu(nvme_req(req)->result.u64)); + + nvme_trace_bio_complete(req, status); + blk_mq_end_request(req, status); +} +void nvme_complete_rq(struct request *req) +{ + trace_nvme_complete_rq(req); nvme_cleanup_cmd(req); if (nvme_req(req)->ctrl->kas) nvme_req(req)->ctrl->comp_seen = true; - if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { - if ((req->cmd_flags & REQ_NVME_MPATH) && nvme_failover_req(req)) - return; - - if (!blk_queue_dying(req->q)) { - nvme_retry_req(req); - return; - } - } else if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && - req_op(req) == REQ_OP_ZONE_APPEND) { - req->__sector = nvme_lba_to_sect(req->q->queuedata, - le64_to_cpu(nvme_req(req)->result.u64)); + switch (nvme_req_disposition(req)) { + case COMPLETE: + nvme_complete_req(req); + return; + case RETRY: + nvme_retry_req(req); + return; + case REDIRECT_ANA: + nvme_failover_req(req, true); + return; + case REDIRECT_TMP: + nvme_failover_req(req, false); + return; } - - nvme_trace_bio_complete(req, status); - blk_mq_end_request(req, status); } EXPORT_SYMBOL_GPL(nvme_complete_rq); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3ded54d2c9c6ad..0c22b2c88687a2 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -65,51 +65,32 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, } } -bool nvme_failover_req(struct request *req) +void nvme_failover_req(struct request *req, bool is_ana_status) { struct nvme_ns *ns = req->q->queuedata; - u16 status = nvme_req(req)->status; unsigned long flags; - switch (status & 0x7ff) { - case NVME_SC_ANA_TRANSITION: - case NVME_SC_ANA_INACCESSIBLE: - case NVME_SC_ANA_PERSISTENT_LOSS: - /* - * If we got back an ANA error we know the controller is alive, - * but not ready to serve this namespaces. The spec suggests - * we should update our general state here, but due to the fact - * that the admin and I/O queues are not serialized that is - * fundamentally racy. So instead just clear the current path, - * mark the the path as pending and kick of a re-read of the ANA - * log page ASAP. - */ - nvme_mpath_clear_current_path(ns); - if (ns->ctrl->ana_log_buf) { - set_bit(NVME_NS_ANA_PENDING, &ns->flags); - queue_work(nvme_wq, &ns->ctrl->ana_work); - } - break; - case NVME_SC_HOST_PATH_ERROR: - case NVME_SC_HOST_ABORTED_CMD: - /* - * Temporary transport disruption in talking to the controller. - * Try to send on a new path. - */ - nvme_mpath_clear_current_path(ns); - break; - default: - /* This was a non-ANA error so follow the normal error path. */ - return false; + nvme_mpath_clear_current_path(ns); + + /* + * If we got back an ANA error we know the controller is alive, but not + * ready to serve this namespaces. The spec suggests we should update + * our general state here, but due to the fact that the admin and I/O + * queues are not serialized that is fundamentally racy. So instead + * just clear the current path, mark the the path as pending and kick + * of a re-read of the ANA log page ASAP. + */ + if (is_ana_status && ns->ctrl->ana_log_buf) { + set_bit(NVME_NS_ANA_PENDING, &ns->flags); + queue_work(nvme_wq, &ns->ctrl->ana_work); } spin_lock_irqsave(&ns->head->requeue_lock, flags); blk_steal_bios(&ns->head->requeue_list, req); spin_unlock_irqrestore(&ns->head->requeue_lock, flags); - blk_mq_end_request(req, 0); + blk_mq_end_request(req, 0); kblockd_schedule_work(&ns->head->requeue_work); - return true; } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ebb8c3ed388554..aeff1c491ac2ef 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -629,7 +629,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, struct nvme_ctrl *ctrl, int *flags); -bool nvme_failover_req(struct request *req); +void nvme_failover_req(struct request *req, bool is_ana_status); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); @@ -688,9 +688,8 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); } -static inline bool nvme_failover_req(struct request *req) +static inline void nvme_failover_req(struct request *req, bool is_ana_status) { - return false; } static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel