[PATCH 6.6 368/744] RDMA/bnxt_re: Refactor the queue index update

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



6.6-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Chandramohan Akula <chandramohan.akula@xxxxxxxxxxxx>

[ Upstream commit 3a4304d82695015d0703ee0c3331458d22e3ba7c ]

The queue index wrap around logic is based on power of 2 size depth.
All queues are created with power of 2 depth. This increases the
memory usage by the driver. This change is required for the next
patches that avoids the power of 2 depth requirement for each of
the queues.

Update the function that increments producer index and consumer
index during wrap around. Also, changes the index handling across
multiple functions.

Signed-off-by: Chandramohan Akula <chandramohan.akula@xxxxxxxxxxxx>
Signed-off-by: Selvin Xavier <selvin.xavier@xxxxxxxxxxxx>
Link: https://lore.kernel.org/r/1698069803-1787-2-git-send-email-selvin.xavier@xxxxxxxxxxxx
Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx>
Stable-dep-of: 78cfd17142ef ("bnxt_re: avoid shift undefined behavior in bnxt_qplib_alloc_init_hwq")
Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>
---
 drivers/infiniband/hw/bnxt_re/qplib_fp.c   | 138 ++++++++++-----------
 drivers/infiniband/hw/bnxt_re/qplib_fp.h   |  20 ++-
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.c |  17 ++-
 drivers/infiniband/hw/bnxt_re/qplib_rcfw.h |   4 +-
 drivers/infiniband/hw/bnxt_re/qplib_res.c  |   2 +-
 drivers/infiniband/hw/bnxt_re/qplib_res.h  |  37 ++++--
 6 files changed, 123 insertions(+), 95 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 2a62239187622..48dcb14004594 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -237,18 +237,15 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
 	struct bnxt_qplib_hwq *hwq = &nq->hwq;
 	struct nq_base *nqe, **nq_ptr;
 	int budget = nq->budget;
-	u32 sw_cons, raw_cons;
 	uintptr_t q_handle;
 	u16 type;
 
 	spin_lock_bh(&hwq->lock);
 	/* Service the NQ until empty */
-	raw_cons = hwq->cons;
 	while (budget--) {
-		sw_cons = HWQ_CMP(raw_cons, hwq);
 		nq_ptr = (struct nq_base **)hwq->pbl_ptr;
-		nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
-		if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+		nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)];
+		if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
 			break;
 
 		/*
@@ -276,7 +273,8 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
 		default:
 			break;
 		}
-		raw_cons++;
+		bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+					 1, &nq->nq_db.dbinfo.flags);
 	}
 	spin_unlock_bh(&hwq->lock);
 }
@@ -302,18 +300,16 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
 	struct bnxt_qplib_hwq *hwq = &nq->hwq;
 	struct bnxt_qplib_cq *cq;
 	int budget = nq->budget;
-	u32 sw_cons, raw_cons;
 	struct nq_base *nqe;
 	uintptr_t q_handle;
+	u32 hw_polled = 0;
 	u16 type;
 
 	spin_lock_bh(&hwq->lock);
 	/* Service the NQ until empty */
-	raw_cons = hwq->cons;
 	while (budget--) {
-		sw_cons = HWQ_CMP(raw_cons, hwq);
-		nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
-		if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+		nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+		if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
 			break;
 
 		/*
@@ -372,12 +368,12 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
 				 "nqe with type = 0x%x not handled\n", type);
 			break;
 		}
-		raw_cons++;
+		hw_polled++;
+		bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+					 1, &nq->nq_db.dbinfo.flags);
 	}
-	if (hwq->cons != raw_cons) {
-		hwq->cons = raw_cons;
+	if (hw_polled)
 		bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
-	}
 	spin_unlock_bh(&hwq->lock);
 }
 
@@ -505,6 +501,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq,  u32 reg_offt)
 	pdev = nq->pdev;
 	nq_db = &nq->nq_db;
 
+	nq_db->dbinfo.flags = 0;
 	nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION;
 	nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id);
 	if (!nq_db->reg.bar_base) {
@@ -649,7 +646,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
 		rc = -ENOMEM;
 		goto fail;
 	}
-
+	srq->dbinfo.flags = 0;
 	bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
 				 CMDQ_BASE_OPCODE_CREATE_SRQ,
 				 sizeof(req));
@@ -703,13 +700,9 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
 			  struct bnxt_qplib_srq *srq)
 {
 	struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
-	u32 sw_prod, sw_cons, count = 0;
-
-	sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
-	sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
+	u32 count;
 
-	count = sw_prod > sw_cons ? sw_prod - sw_cons :
-				    srq_hwq->max_elements - sw_cons + sw_prod;
+	count = __bnxt_qplib_get_avail(srq_hwq);
 	if (count > srq->threshold) {
 		srq->arm_req = false;
 		bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold);
@@ -762,7 +755,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
 	struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
 	struct rq_wqe *srqe;
 	struct sq_sge *hw_sge;
-	u32 sw_prod, sw_cons, count = 0;
+	u32 count = 0;
 	int i, next;
 
 	spin_lock(&srq_hwq->lock);
@@ -776,8 +769,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
 	srq->start_idx = srq->swq[next].next_idx;
 	spin_unlock(&srq_hwq->lock);
 
-	sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
-	srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL);
+	srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL);
 	memset(srqe, 0, srq->wqe_size);
 	/* Calculate wqe_size16 and data_len */
 	for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
@@ -793,17 +785,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
 	srqe->wr_id[0] = cpu_to_le32((u32)next);
 	srq->swq[next].wr_id = wqe->wr_id;
 
-	srq_hwq->prod++;
+	bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot);
 
 	spin_lock(&srq_hwq->lock);
-	sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
-	/* retaining srq_hwq->cons for this logic
-	 * actually the lock is only required to
-	 * read srq_hwq->cons.
-	 */
-	sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
-	count = sw_prod > sw_cons ? sw_prod - sw_cons :
-				    srq_hwq->max_elements - sw_cons + sw_prod;
+	count = __bnxt_qplib_get_avail(srq_hwq);
 	spin_unlock(&srq_hwq->lock);
 	/* Ring DB */
 	bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ);
@@ -850,6 +835,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	u32 tbl_indx;
 	int rc;
 
+	sq->dbinfo.flags = 0;
 	bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
 				 CMDQ_BASE_OPCODE_CREATE_QP1,
 				 sizeof(req));
@@ -886,6 +872,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	/* RQ */
 	if (rq->max_wqe) {
+		rq->dbinfo.flags = 0;
 		hwq_attr.res = res;
 		hwq_attr.sginfo = &rq->sg_info;
 		hwq_attr.stride = sizeof(struct sq_sge);
@@ -993,6 +980,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 	u32 tbl_indx;
 	u16 nsge;
 
+	sq->dbinfo.flags = 0;
 	bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
 				 CMDQ_BASE_OPCODE_CREATE_QP,
 				 sizeof(req));
@@ -1041,6 +1029,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 
 	/* RQ */
 	if (!qp->srq) {
+		rq->dbinfo.flags = 0;
 		hwq_attr.res = res;
 		hwq_attr.sginfo = &rq->sg_info;
 		hwq_attr.stride = sizeof(struct sq_sge);
@@ -1455,12 +1444,15 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
 static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
 {
 	struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+	u32 peek_flags, peek_cons;
 	struct cq_base *hw_cqe;
 	int i;
 
+	peek_flags = cq->dbinfo.flags;
+	peek_cons = cq_hwq->cons;
 	for (i = 0; i < cq_hwq->max_elements; i++) {
-		hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL);
-		if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
+		hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL);
+		if (!CQE_CMP_VALID(hw_cqe, peek_flags))
 			continue;
 		/*
 		 * The valid test of the entry must be done first before
@@ -1490,6 +1482,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
 		default:
 			break;
 		}
+		bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons,
+					 1, &peek_flags);
 	}
 }
 
@@ -1962,7 +1956,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 	bnxt_qplib_fill_psn_search(qp, wqe, swq);
 queue_err:
 	bnxt_qplib_swq_mod_start(sq, wqe_idx);
-	bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+	bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots);
 	qp->wqe_cnt++;
 done:
 	if (sch_handler) {
@@ -2050,7 +2044,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
 	base_hdr->wr_id[0] = cpu_to_le32(wqe_idx);
 queue_err:
 	bnxt_qplib_swq_mod_start(rq, wqe_idx);
-	bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+	bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots);
 done:
 	if (sch_handler) {
 		nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
@@ -2087,6 +2081,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 		return -EINVAL;
 	}
 
+	cq->dbinfo.flags = 0;
 	hwq_attr.res = res;
 	hwq_attr.depth = cq->max_wqe;
 	hwq_attr.stride = sizeof(struct cq_base);
@@ -2102,7 +2097,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
 
 	req.dpi = cpu_to_le32(cq->dpi->dpi);
 	req.cq_handle = cpu_to_le64(cq->cq_handle);
-	req.cq_size = cpu_to_le32(cq->hwq.max_elements);
+	req.cq_size = cpu_to_le32(cq->max_wqe);
 	pbl = &cq->hwq.pbl[PBL_LVL_0];
 	pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
 		     CMDQ_CREATE_CQ_PG_SIZE_SFT);
@@ -2145,6 +2140,8 @@ void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
 {
 	bnxt_qplib_free_hwq(res, &cq->hwq);
 	memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
+       /* Reset only the cons bit in the flags */
+	cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT);
 }
 
 int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
@@ -2241,7 +2238,8 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
 		cqe++;
 		(*budget)--;
 skip_compl:
-		bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots);
+		bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+					 sq->swq[last].slots, &sq->dbinfo.flags);
 		sq->swq_last = sq->swq[last].next_idx;
 	}
 	*pcqe = cqe;
@@ -2288,7 +2286,8 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
 		cqe->wr_id = rq->swq[last].wr_id;
 		cqe++;
 		(*budget)--;
-		bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots);
+		bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+					 rq->swq[last].slots, &rq->dbinfo.flags);
 		rq->swq_last = rq->swq[last].next_idx;
 	}
 	*pcqe = cqe;
@@ -2317,7 +2316,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
 static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
 		     u32 cq_cons, u32 swq_last, u32 cqe_sq_cons)
 {
-	u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+	u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags;
 	struct bnxt_qplib_q *sq = &qp->sq;
 	struct cq_req *peek_req_hwcqe;
 	struct bnxt_qplib_qp *peek_qp;
@@ -2348,16 +2347,14 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
 	}
 	if (sq->condition) {
 		/* Peek at the completions */
-		peek_raw_cq_cons = cq->hwq.cons;
+		peek_flags = cq->dbinfo.flags;
 		peek_sw_cq_cons = cq_cons;
 		i = cq->hwq.max_elements;
 		while (i--) {
-			peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
 			peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq,
 						       peek_sw_cq_cons, NULL);
 			/* If the next hwcqe is VALID */
-			if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
-					  cq->hwq.max_elements)) {
+			if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) {
 			/*
 			 * The valid test of the entry must be done first before
 			 * reading any further.
@@ -2400,8 +2397,9 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
 				rc = -EINVAL;
 				goto out;
 			}
-			peek_sw_cq_cons++;
-			peek_raw_cq_cons++;
+			bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements,
+						 &peek_sw_cq_cons,
+						 1, &peek_flags);
 		}
 		dev_err(&cq->hwq.pdev->dev,
 			"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
@@ -2488,7 +2486,8 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
 			}
 		}
 skip:
-		bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots);
+		bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+					 swq->slots, &sq->dbinfo.flags);
 		sq->swq_last = swq->next_idx;
 		if (sq->single)
 			break;
@@ -2515,7 +2514,8 @@ static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag)
 	srq->swq[srq->last_idx].next_idx = (int)tag;
 	srq->last_idx = (int)tag;
 	srq->swq[srq->last_idx].next_idx = -1;
-	srq->hwq.cons++; /* Support for SRQE counter */
+	bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons,
+				 srq->dbinfo.max_slot, &srq->dbinfo.flags);
 	spin_unlock(&srq->hwq.lock);
 }
 
@@ -2584,7 +2584,8 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 		cqe->wr_id = swq->wr_id;
 		cqe++;
 		(*budget)--;
-		bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+		bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+					 swq->slots, &rq->dbinfo.flags);
 		rq->swq_last = swq->next_idx;
 		*pcqe = cqe;
 
@@ -2670,7 +2671,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 		cqe->wr_id = swq->wr_id;
 		cqe++;
 		(*budget)--;
-		bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+		bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+					 swq->slots, &rq->dbinfo.flags);
 		rq->swq_last = swq->next_idx;
 		*pcqe = cqe;
 
@@ -2687,14 +2689,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
 {
 	struct cq_base *hw_cqe;
-	u32 sw_cons, raw_cons;
 	bool rc = true;
 
-	raw_cons = cq->hwq.cons;
-	sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
-	hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
+	hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
 	 /* Check for Valid bit. If the CQE is valid, return false */
-	rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
+	rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags);
 	return rc;
 }
 
@@ -2776,7 +2775,8 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 		cqe->wr_id = swq->wr_id;
 		cqe++;
 		(*budget)--;
-		bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+		bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+					 swq->slots, &rq->dbinfo.flags);
 		rq->swq_last = swq->next_idx;
 		*pcqe = cqe;
 
@@ -2849,7 +2849,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
 			cqe++;
 			(*budget)--;
 		}
-		bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots);
+		bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+					 sq->swq[swq_last].slots, &sq->dbinfo.flags);
 		sq->swq_last = sq->swq[swq_last].next_idx;
 	}
 	*pcqe = cqe;
@@ -2934,19 +2935,17 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 		       int num_cqes, struct bnxt_qplib_qp **lib_qp)
 {
 	struct cq_base *hw_cqe;
-	u32 sw_cons, raw_cons;
 	int budget, rc = 0;
+	u32 hw_polled = 0;
 	u8 type;
 
-	raw_cons = cq->hwq.cons;
 	budget = num_cqes;
 
 	while (budget) {
-		sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
-		hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
+		hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
 
 		/* Check for Valid bit */
-		if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements))
+		if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags))
 			break;
 
 		/*
@@ -2961,7 +2960,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 			rc = bnxt_qplib_cq_process_req(cq,
 						       (struct cq_req *)hw_cqe,
 						       &cqe, &budget,
-						       sw_cons, lib_qp);
+						       cq->hwq.cons, lib_qp);
 			break;
 		case CQ_BASE_CQE_TYPE_RES_RC:
 			rc = bnxt_qplib_cq_process_res_rc(cq,
@@ -3007,12 +3006,13 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
 				dev_err(&cq->hwq.pdev->dev,
 					"process_cqe error rc = 0x%x\n", rc);
 		}
-		raw_cons++;
+		hw_polled++;
+		bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons,
+					 1, &cq->dbinfo.flags);
+
 	}
-	if (cq->hwq.cons != raw_cons) {
-		cq->hwq.cons = raw_cons;
+	if (hw_polled)
 		bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ);
-	}
 exit:
 	return num_cqes - budget;
 }
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 404b851091ca2..23c27cb429786 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -348,9 +348,21 @@ struct bnxt_qplib_qp {
 #define CQE_IDX(x)		((x) & CQE_MAX_IDX_PER_PG)
 
 #define ROCE_CQE_CMP_V			0
-#define CQE_CMP_VALID(hdr, raw_cons, cp_bit)			\
+#define CQE_CMP_VALID(hdr, pass)			\
 	(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) ==		\
-	   !((raw_cons) & (cp_bit)))
+	   !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
+
+static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq)
+{
+	int cons, prod, avail;
+
+	cons = hwq->cons;
+	prod = hwq->prod;
+	avail = cons - prod;
+	if (cons <= prod)
+		avail += hwq->depth;
+	return avail;
+}
 
 static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
 					 u8 slots)
@@ -443,9 +455,9 @@ struct bnxt_qplib_cq {
 #define NQE_PG(x)		(((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
 #define NQE_IDX(x)		((x) & NQE_MAX_IDX_PER_PG)
 
-#define NQE_CMP_VALID(hdr, raw_cons, cp_bit)			\
+#define NQE_CMP_VALID(hdr, pass)			\
 	(!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) ==	\
-	   !((raw_cons) & (cp_bit)))
+	   !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
 
 #define BNXT_QPLIB_NQE_MAX_CNT		(128 * 1024)
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index e47b4ca64d33e..15e6d2b80c700 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -734,17 +734,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
 	u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
 	struct bnxt_qplib_hwq *hwq = &creq->hwq;
 	struct creq_base *creqe;
-	u32 sw_cons, raw_cons;
 	unsigned long flags;
 	u32 num_wakeup = 0;
+	u32 hw_polled = 0;
 
 	/* Service the CREQ until budget is over */
 	spin_lock_irqsave(&hwq->lock, flags);
-	raw_cons = hwq->cons;
 	while (budget > 0) {
-		sw_cons = HWQ_CMP(raw_cons, hwq);
-		creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
-		if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements))
+		creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+		if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
 			break;
 		/* The valid test of the entry must be done first before
 		 * reading any further.
@@ -775,15 +773,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
 					 type);
 			break;
 		}
-		raw_cons++;
 		budget--;
+		hw_polled++;
+		bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+					 1, &creq->creq_db.dbinfo.flags);
 	}
 
-	if (hwq->cons != raw_cons) {
-		hwq->cons = raw_cons;
+	if (hw_polled)
 		bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo,
 				      rcfw->res->cctx, true);
-	}
 	spin_unlock_irqrestore(&hwq->lock, flags);
 	if (num_wakeup)
 		wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
@@ -1113,6 +1111,7 @@ static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
 	pdev = rcfw->pdev;
 	creq_db = &rcfw->creq.creq_db;
 
+	creq_db->dbinfo.flags = 0;
 	creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION;
 	creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
 	if (!creq_db->reg.bar_id)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 7b31bee3e0005..45996e60a0d03 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -141,9 +141,9 @@ struct bnxt_qplib_crsbe {
 /* Allocate 1 per QP for async error notification for now */
 #define BNXT_QPLIB_CREQE_MAX_CNT	(64 * 1024)
 #define BNXT_QPLIB_CREQE_UNITS		16	/* 16-Bytes per prod unit */
-#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit)			\
+#define CREQ_CMP_VALID(hdr, pass)			\
 	(!!((hdr)->v & CREQ_BASE_V) ==				\
-	   !((raw_cons) & (cp_bit)))
+	   !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
 #define CREQ_ENTRY_POLL_BUDGET		0x100
 
 /* HWQ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 157db6b7e1193..ae2bde34e785b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -343,7 +343,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
 	hwq->cons = 0;
 	hwq->pdev = pdev;
 	hwq->depth = hwq_attr->depth;
-	hwq->max_elements = depth;
+	hwq->max_elements = hwq->depth;
 	hwq->element_size = stride;
 	hwq->qe_ppg = pg_size / stride;
 	/* For direct access to the elements */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 5949f004f7856..3e3383b8a9135 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -186,6 +186,14 @@ struct bnxt_qplib_db_info {
 	struct bnxt_qplib_hwq	*hwq;
 	u32			xid;
 	u32			max_slot;
+	u32                     flags;
+};
+
+enum bnxt_qplib_db_info_flags_mask {
+	BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT        = 0x0UL,
+	BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT        = 0x1UL,
+	BNXT_QPLIB_FLAG_EPOCH_CONS_MASK         = 0x1UL,
+	BNXT_QPLIB_FLAG_EPOCH_PROD_MASK         = 0x2UL,
 };
 
 /* Tables */
@@ -396,24 +404,34 @@ void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res);
 
 int bnxt_qplib_determine_atomics(struct pci_dev *dev);
 
-static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt)
+static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo,
+					    struct bnxt_qplib_hwq *hwq, u32 cnt)
 {
-	hwq->prod = (hwq->prod + cnt) % hwq->depth;
+	/* move prod and update toggle/epoch if wrap around */
+	hwq->prod += cnt;
+	if (hwq->prod >= hwq->depth) {
+		hwq->prod %= hwq->depth;
+		dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT;
+	}
 }
 
-static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq,
-					    u32 cnt)
+static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
+					    u32 *dbinfo_flags)
 {
-	hwq->cons = (hwq->cons + cnt) % hwq->depth;
+	/* move cons and update toggle/epoch if wrap around */
+	*cons += cnt;
+	if (*cons >= max_elements) {
+		*cons %= max_elements;
+		*dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT;
+	}
 }
 
 static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
 					bool arm)
 {
-	u32 key;
+	u32 key = 0;
 
-	key = info->hwq->cons & (info->hwq->max_elements - 1);
-	key |= (CMPL_DOORBELL_IDX_VALID |
+	key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID |
 		(CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK));
 	if (!arm)
 		key |= CMPL_DOORBELL_MASK;
@@ -427,8 +445,7 @@ static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info,
 
 	key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
 	key <<= 32;
-	key |= (info->hwq->cons & (info->hwq->max_elements - 1)) &
-		DBC_DBC_INDEX_MASK;
+	key |= (info->hwq->cons & DBC_DBC_INDEX_MASK);
 	writeq(key, info->db);
 }
 
-- 
2.43.0







[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux