I/O conditions on the nvme target may have the driver submitting to a full hardware wq. The hardware wq is a shared resource among all nvme controllers. When the driver hit a full wq, it failed the io posting back to the nvme-fc transport, which then escalated it into errors. Correct by maintaining a sideband queue within the driver that is added to when the WQ full condition is hit, and drained from as soon as new WQ space opens up. Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx> Signed-off-by: James Smart <james.smart@xxxxxxxxxxxx> --- drivers/scsi/lpfc/lpfc_crtn.h | 1 + drivers/scsi/lpfc/lpfc_nvmet.c | 116 +++++++++++++++++++++++++++++++++++++++++ drivers/scsi/lpfc/lpfc_nvmet.h | 1 + drivers/scsi/lpfc/lpfc_sli.c | 3 ++ drivers/scsi/lpfc/lpfc_sli4.h | 5 +- 5 files changed, 125 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 559f9aa0ed08..3ecf50df93f4 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -254,6 +254,7 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctxp); int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, struct fc_frame_header *fc_hdr); +void lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, struct lpfc_queue *wq); void lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba); void lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba); void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 7927ac46d345..9c2acf90212c 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -71,6 +71,8 @@ static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *, static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, struct lpfc_nvmet_rcv_ctx *, uint32_t, uint16_t); +static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *, + struct lpfc_nvmet_rcv_ctx *); void lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) @@ -741,7 +743,10 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, struct lpfc_nvmet_rcv_ctx *ctxp = container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; + struct lpfc_queue *wq; struct lpfc_iocbq *nvmewqeq; + struct lpfc_sli_ring *pring; + unsigned long iflags; int rc; if (phba->pport->load_flag & FC_UNLOADING) { @@ -820,6 +825,21 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, return 0; } + if (rc == -EBUSY) { + /* + * WQ was full, so queue nvmewqeq to be sent after + * WQE release CQE + */ + ctxp->flag |= LPFC_NVMET_DEFER_WQFULL; + wq = phba->sli4_hba.nvme_wq[rsp->hwqid]; + pring = wq->pring; + spin_lock_irqsave(&pring->ring_lock, iflags); + list_add_tail(&nvmewqeq->list, &wq->wqfull_list); + wq->q_flag |= HBA_NVMET_WQFULL; + spin_unlock_irqrestore(&pring->ring_lock, iflags); + return 0; + } + /* Give back resources */ atomic_inc(&lpfc_nvmep->xmt_fcp_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, @@ -851,6 +871,7 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, struct lpfc_nvmet_rcv_ctx *ctxp = container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; + struct lpfc_queue *wq; unsigned long flags; if (phba->pport->load_flag & FC_UNLOADING) @@ -880,6 +901,14 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, } ctxp->flag |= LPFC_NVMET_ABORT_OP; + if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) { + lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, + ctxp->oxid); + wq = phba->sli4_hba.nvme_wq[ctxp->wqeq->hba_wqidx]; + lpfc_nvmet_wqfull_flush(phba, wq, ctxp); + return; + } + /* An state of LPFC_NVMET_STE_RCV means we have just received * the NVME command and have not started processing it. * (by issuing any IO WQEs on this exchange yet) @@ -1435,16 +1464,103 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, return 0; } +static void +lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq, + struct lpfc_nvmet_rcv_ctx *ctxp) +{ + struct lpfc_sli_ring *pring; + struct lpfc_iocbq *nvmewqeq; + struct lpfc_iocbq *next_nvmewqeq; + unsigned long iflags; + struct lpfc_wcqe_complete wcqe; + struct lpfc_wcqe_complete *wcqep; + + pring = wq->pring; + wcqep = &wcqe; + + /* Fake an ABORT error code back to cmpl routine */ + memset(wcqep, 0, sizeof(struct lpfc_wcqe_complete)); + bf_set(lpfc_wcqe_c_status, wcqep, IOSTAT_LOCAL_REJECT); + wcqep->parameter = IOERR_ABORT_REQUESTED; + + spin_lock_irqsave(&pring->ring_lock, iflags); + list_for_each_entry_safe(nvmewqeq, next_nvmewqeq, + &wq->wqfull_list, list) { + if (ctxp) { + /* Checking for a specific IO to flush */ + if (nvmewqeq->context2 == ctxp) { + list_del(&nvmewqeq->list); + spin_unlock_irqrestore(&pring->ring_lock, + iflags); + lpfc_nvmet_xmt_fcp_op_cmp(phba, nvmewqeq, + wcqep); + return; + } + continue; + } else { + /* Flush all IOs */ + list_del(&nvmewqeq->list); + spin_unlock_irqrestore(&pring->ring_lock, iflags); + lpfc_nvmet_xmt_fcp_op_cmp(phba, nvmewqeq, wcqep); + spin_lock_irqsave(&pring->ring_lock, iflags); + } + } + if (!ctxp) + wq->q_flag &= ~HBA_NVMET_WQFULL; + spin_unlock_irqrestore(&pring->ring_lock, iflags); +} + +void +lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, + struct lpfc_queue *wq) +{ +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) + struct lpfc_sli_ring *pring; + struct lpfc_iocbq *nvmewqeq; + unsigned long iflags; + int rc; + + /* + * Some WQE slots are available, so try to re-issue anything + * on the WQ wqfull_list. + */ + pring = wq->pring; + spin_lock_irqsave(&pring->ring_lock, iflags); + while (!list_empty(&wq->wqfull_list)) { + list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq, + list); + spin_unlock_irqrestore(&pring->ring_lock, iflags); + rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq); + spin_lock_irqsave(&pring->ring_lock, iflags); + if (rc == -EBUSY) { + /* WQ was full again, so put it back on the list */ + list_add(&nvmewqeq->list, &wq->wqfull_list); + spin_unlock_irqrestore(&pring->ring_lock, iflags); + return; + } + } + wq->q_flag &= ~HBA_NVMET_WQFULL; + spin_unlock_irqrestore(&pring->ring_lock, iflags); + +#endif +} + void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_queue *wq; + uint32_t qidx; if (phba->nvmet_support == 0) return; if (phba->targetport) { tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) { + wq = phba->sli4_hba.nvme_wq[qidx]; + lpfc_nvmet_wqfull_flush(phba, wq, NULL); + } init_completion(&tgtp->tport_unreg_done); nvmet_fc_unregister_targetport(phba->targetport); wait_for_completion_timeout(&tgtp->tport_unreg_done, 5); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index 5b32c9e4d4ef..354cce443c9f 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -132,6 +132,7 @@ struct lpfc_nvmet_rcv_ctx { #define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ #define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ #define LPFC_NVMET_DEFER_RCV_REPOST 0x20 /* repost to RQ on defer rcv */ +#define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ struct rqb_dmabuf *rqb_buffer; struct lpfc_nvmet_ctxbuf *ctxbuf; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d08d9b48f6b1..fbda2fbcbfec 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13232,6 +13232,8 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, if (childwq->queue_id == hba_wqid) { lpfc_sli4_wq_release(childwq, bf_get(lpfc_wcqe_r_wqe_index, wcqe)); + if (childwq->q_flag & HBA_NVMET_WQFULL) + lpfc_nvmet_wqfull_process(phba, childwq); wqid_matched = true; break; } @@ -13950,6 +13952,7 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size, INIT_LIST_HEAD(&queue->list); INIT_LIST_HEAD(&queue->wq_list); + INIT_LIST_HEAD(&queue->wqfull_list); INIT_LIST_HEAD(&queue->page_list); INIT_LIST_HEAD(&queue->child_list); diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index a9af9980fc43..ac81bfa59278 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -145,6 +145,7 @@ struct lpfc_rqb { struct lpfc_queue { struct list_head list; struct list_head wq_list; + struct list_head wqfull_list; enum lpfc_sli4_queue_type type; enum lpfc_sli4_queue_subtype subtype; struct lpfc_hba *phba; @@ -173,9 +174,11 @@ struct lpfc_queue { #define LPFC_EXPANDED_PAGE_SIZE 16384 #define LPFC_DEFAULT_PAGE_SIZE 4096 uint16_t chann; /* IO channel this queue is associated with */ - uint16_t db_format; + uint8_t db_format; #define LPFC_DB_RING_FORMAT 0x01 #define LPFC_DB_LIST_FORMAT 0x02 + uint8_t q_flag; +#define HBA_NVMET_WQFULL 0x1 /* We hit WQ Full condition for NVMET */ void __iomem *db_regaddr; /* For q stats */ uint32_t q_cnt_1; -- 2.13.1