[PATCH 05/19] lpfc: Add WQ Full Logic for NVME Target

James Smart <jsmart2021@xxxxxxxxx> · Wed, 24 Jan 2018 14:45:34 -0800

I/O conditions on the nvme target may have the driver submitting
to a full hardware wq. The hardware wq is a shared resource among
all nvme controllers. When the driver hit a full wq, it failed the
io posting back to the nvme-fc transport, which then escalated it
into errors.

Correct by maintaining a sideband queue within the driver that is
added to when the WQ full condition is hit, and drained from as soon
as new WQ space opens up.

Signed-off-by: Dick Kennedy <dick.kennedy@xxxxxxxxxxxx>
Signed-off-by: James Smart <james.smart@xxxxxxxxxxxx>
---
 drivers/scsi/lpfc/lpfc_crtn.h  |   1 +
 drivers/scsi/lpfc/lpfc_nvmet.c | 116 +++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_nvmet.h |   1 +
 drivers/scsi/lpfc/lpfc_sli.c   |   3 ++
 drivers/scsi/lpfc/lpfc_sli4.h  |   5 +-
 5 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 559f9aa0ed08..3ecf50df93f4 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -254,6 +254,7 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba,
 			    struct lpfc_nvmet_ctxbuf *ctxp);
 int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 			       struct fc_frame_header *fc_hdr);
+void lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, struct lpfc_queue *wq);
 void lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba);
 void lpfc_nvme_wait_for_io_drain(struct lpfc_hba *phba);
 void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *,
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 7927ac46d345..9c2acf90212c 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -71,6 +71,8 @@ static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *,
 static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
 					   struct lpfc_nvmet_rcv_ctx *,
 					   uint32_t, uint16_t);
+static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *,
+				    struct lpfc_nvmet_rcv_ctx *);
 
 void
 lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
@@ -741,7 +743,10 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 	struct lpfc_nvmet_rcv_ctx *ctxp =
 		container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
+	struct lpfc_queue *wq;
 	struct lpfc_iocbq *nvmewqeq;
+	struct lpfc_sli_ring *pring;
+	unsigned long iflags;
 	int rc;
 
 	if (phba->pport->load_flag & FC_UNLOADING) {
@@ -820,6 +825,21 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
 		return 0;
 	}
 
+	if (rc == -EBUSY) {
+		/*
+		 * WQ was full, so queue nvmewqeq to be sent after
+		 * WQE release CQE
+		 */
+		ctxp->flag |= LPFC_NVMET_DEFER_WQFULL;
+		wq = phba->sli4_hba.nvme_wq[rsp->hwqid];
+		pring = wq->pring;
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		list_add_tail(&nvmewqeq->list, &wq->wqfull_list);
+		wq->q_flag |= HBA_NVMET_WQFULL;
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		return 0;
+	}
+
 	/* Give back resources */
 	atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
 	lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -851,6 +871,7 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	struct lpfc_nvmet_rcv_ctx *ctxp =
 		container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
 	struct lpfc_hba *phba = ctxp->phba;
+	struct lpfc_queue *wq;
 	unsigned long flags;
 
 	if (phba->pport->load_flag & FC_UNLOADING)
@@ -880,6 +901,14 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
 	}
 	ctxp->flag |= LPFC_NVMET_ABORT_OP;
 
+	if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) {
+		lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
+						 ctxp->oxid);
+		wq = phba->sli4_hba.nvme_wq[ctxp->wqeq->hba_wqidx];
+		lpfc_nvmet_wqfull_flush(phba, wq, ctxp);
+		return;
+	}
+
 	/* An state of LPFC_NVMET_STE_RCV means we have just received
 	 * the NVME command and have not started processing it.
 	 * (by issuing any IO WQEs on this exchange yet)
@@ -1435,16 +1464,103 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
 	return 0;
 }
 
+static void
+lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq,
+			struct lpfc_nvmet_rcv_ctx *ctxp)
+{
+	struct lpfc_sli_ring *pring;
+	struct lpfc_iocbq *nvmewqeq;
+	struct lpfc_iocbq *next_nvmewqeq;
+	unsigned long iflags;
+	struct lpfc_wcqe_complete wcqe;
+	struct lpfc_wcqe_complete *wcqep;
+
+	pring = wq->pring;
+	wcqep = &wcqe;
+
+	/* Fake an ABORT error code back to cmpl routine */
+	memset(wcqep, 0, sizeof(struct lpfc_wcqe_complete));
+	bf_set(lpfc_wcqe_c_status, wcqep, IOSTAT_LOCAL_REJECT);
+	wcqep->parameter = IOERR_ABORT_REQUESTED;
+
+	spin_lock_irqsave(&pring->ring_lock, iflags);
+	list_for_each_entry_safe(nvmewqeq, next_nvmewqeq,
+				 &wq->wqfull_list, list) {
+		if (ctxp) {
+			/* Checking for a specific IO to flush */
+			if (nvmewqeq->context2 == ctxp) {
+				list_del(&nvmewqeq->list);
+				spin_unlock_irqrestore(&pring->ring_lock,
+						       iflags);
+				lpfc_nvmet_xmt_fcp_op_cmp(phba, nvmewqeq,
+							  wcqep);
+				return;
+			}
+			continue;
+		} else {
+			/* Flush all IOs */
+			list_del(&nvmewqeq->list);
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			lpfc_nvmet_xmt_fcp_op_cmp(phba, nvmewqeq, wcqep);
+			spin_lock_irqsave(&pring->ring_lock, iflags);
+		}
+	}
+	if (!ctxp)
+		wq->q_flag &= ~HBA_NVMET_WQFULL;
+	spin_unlock_irqrestore(&pring->ring_lock, iflags);
+}
+
+void
+lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
+			  struct lpfc_queue *wq)
+{
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+	struct lpfc_sli_ring *pring;
+	struct lpfc_iocbq *nvmewqeq;
+	unsigned long iflags;
+	int rc;
+
+	/*
+	 * Some WQE slots are available, so try to re-issue anything
+	 * on the WQ wqfull_list.
+	 */
+	pring = wq->pring;
+	spin_lock_irqsave(&pring->ring_lock, iflags);
+	while (!list_empty(&wq->wqfull_list)) {
+		list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq,
+				 list);
+		spin_unlock_irqrestore(&pring->ring_lock, iflags);
+		rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq);
+		spin_lock_irqsave(&pring->ring_lock, iflags);
+		if (rc == -EBUSY) {
+			/* WQ was full again, so put it back on the list */
+			list_add(&nvmewqeq->list, &wq->wqfull_list);
+			spin_unlock_irqrestore(&pring->ring_lock, iflags);
+			return;
+		}
+	}
+	wq->q_flag &= ~HBA_NVMET_WQFULL;
+	spin_unlock_irqrestore(&pring->ring_lock, iflags);
+
+#endif
+}
+
 void
 lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
 {
 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
 	struct lpfc_nvmet_tgtport *tgtp;
+	struct lpfc_queue *wq;
+	uint32_t qidx;
 
 	if (phba->nvmet_support == 0)
 		return;
 	if (phba->targetport) {
 		tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+		for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++) {
+			wq = phba->sli4_hba.nvme_wq[qidx];
+			lpfc_nvmet_wqfull_flush(phba, wq, NULL);
+		}
 		init_completion(&tgtp->tport_unreg_done);
 		nvmet_fc_unregister_targetport(phba->targetport);
 		wait_for_completion_timeout(&tgtp->tport_unreg_done, 5);
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
index 5b32c9e4d4ef..354cce443c9f 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ b/drivers/scsi/lpfc/lpfc_nvmet.h
@@ -132,6 +132,7 @@ struct lpfc_nvmet_rcv_ctx {
 #define LPFC_NVMET_CTX_RLS		0x8  /* ctx free requested */
 #define LPFC_NVMET_ABTS_RCV		0x10  /* ABTS received on exchange */
 #define LPFC_NVMET_DEFER_RCV_REPOST	0x20  /* repost to RQ on defer rcv */
+#define LPFC_NVMET_DEFER_WQFULL		0x40  /* Waiting on a free WQE */
 	struct rqb_dmabuf *rqb_buffer;
 	struct lpfc_nvmet_ctxbuf *ctxbuf;
 
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d08d9b48f6b1..fbda2fbcbfec 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -13232,6 +13232,8 @@ lpfc_sli4_fp_handle_rel_wcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
 		if (childwq->queue_id == hba_wqid) {
 			lpfc_sli4_wq_release(childwq,
 					bf_get(lpfc_wcqe_r_wqe_index, wcqe));
+			if (childwq->q_flag & HBA_NVMET_WQFULL)
+				lpfc_nvmet_wqfull_process(phba, childwq);
 			wqid_matched = true;
 			break;
 		}
@@ -13950,6 +13952,7 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
 
 	INIT_LIST_HEAD(&queue->list);
 	INIT_LIST_HEAD(&queue->wq_list);
+	INIT_LIST_HEAD(&queue->wqfull_list);
 	INIT_LIST_HEAD(&queue->page_list);
 	INIT_LIST_HEAD(&queue->child_list);
 
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index a9af9980fc43..ac81bfa59278 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -145,6 +145,7 @@ struct lpfc_rqb {
 struct lpfc_queue {
 	struct list_head list;
 	struct list_head wq_list;
+	struct list_head wqfull_list;
 	enum lpfc_sli4_queue_type type;
 	enum lpfc_sli4_queue_subtype subtype;
 	struct lpfc_hba *phba;
@@ -173,9 +174,11 @@ struct lpfc_queue {
 #define LPFC_EXPANDED_PAGE_SIZE	16384
 #define LPFC_DEFAULT_PAGE_SIZE	4096
 	uint16_t chann;		/* IO channel this queue is associated with */
-	uint16_t db_format;
+	uint8_t db_format;
 #define LPFC_DB_RING_FORMAT	0x01
 #define LPFC_DB_LIST_FORMAT	0x02
+	uint8_t q_flag;
+#define HBA_NVMET_WQFULL	0x1 /* We hit WQ Full condition for NVMET */
 	void __iomem *db_regaddr;
 	/* For q stats */
 	uint32_t q_cnt_1;
-- 
2.13.1