This is a note to let you know that I've just added the patch titled RDMA/bnxt_re: Refactor the queue index update to the 6.6-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: rdma-bnxt_re-refactor-the-queue-index-update.patch and it can be found in the queue-6.6 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. commit 75761a2d16722244c32aac906820387251f074a1 Author: Chandramohan Akula <chandramohan.akula@xxxxxxxxxxxx> Date: Mon Oct 23 07:03:22 2023 -0700 RDMA/bnxt_re: Refactor the queue index update [ Upstream commit 3a4304d82695015d0703ee0c3331458d22e3ba7c ] The queue index wrap around logic is based on power of 2 size depth. All queues are created with power of 2 depth. This increases the memory usage by the driver. This change is required for the next patches that avoids the power of 2 depth requirement for each of the queues. Update the function that increments producer index and consumer index during wrap around. Also, changes the index handling across multiple functions. Signed-off-by: Chandramohan Akula <chandramohan.akula@xxxxxxxxxxxx> Signed-off-by: Selvin Xavier <selvin.xavier@xxxxxxxxxxxx> Link: https://lore.kernel.org/r/1698069803-1787-2-git-send-email-selvin.xavier@xxxxxxxxxxxx Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx> Stable-dep-of: 78cfd17142ef ("bnxt_re: avoid shift undefined behavior in bnxt_qplib_alloc_init_hwq") Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx> diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 2a62239187622..48dcb14004594 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -237,18 +237,15 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) struct bnxt_qplib_hwq *hwq = &nq->hwq; struct nq_base *nqe, **nq_ptr; int budget = nq->budget; - u32 sw_cons, raw_cons; uintptr_t q_handle; u16 type; spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ - raw_cons = hwq->cons; while (budget--) { - sw_cons = HWQ_CMP(raw_cons, hwq); nq_ptr = (struct nq_base **)hwq->pbl_ptr; - nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]; - if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)]; + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) break; /* @@ -276,7 +273,8 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq) default: break; } - raw_cons++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); } spin_unlock_bh(&hwq->lock); } @@ -302,18 +300,16 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) struct bnxt_qplib_hwq *hwq = &nq->hwq; struct bnxt_qplib_cq *cq; int budget = nq->budget; - u32 sw_cons, raw_cons; struct nq_base *nqe; uintptr_t q_handle; + u32 hw_polled = 0; u16 type; spin_lock_bh(&hwq->lock); /* Service the NQ until empty */ - raw_cons = hwq->cons; while (budget--) { - sw_cons = HWQ_CMP(raw_cons, hwq); - nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); - if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements)) + nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL); + if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags)) break; /* @@ -372,12 +368,12 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t) "nqe with type = 0x%x not handled\n", type); break; } - raw_cons++; + hw_polled++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &nq->nq_db.dbinfo.flags); } - if (hwq->cons != raw_cons) { - hwq->cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true); - } spin_unlock_bh(&hwq->lock); } @@ -505,6 +501,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt) pdev = nq->pdev; nq_db = &nq->nq_db; + nq_db->dbinfo.flags = 0; nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION; nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id); if (!nq_db->reg.bar_base) { @@ -649,7 +646,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res, rc = -ENOMEM; goto fail; } - + srq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_SRQ, sizeof(req)); @@ -703,13 +700,9 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_srq *srq) { struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; - u32 sw_prod, sw_cons, count = 0; - - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq); + u32 count; - count = sw_prod > sw_cons ? sw_prod - sw_cons : - srq_hwq->max_elements - sw_cons + sw_prod; + count = __bnxt_qplib_get_avail(srq_hwq); if (count > srq->threshold) { srq->arm_req = false; bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold); @@ -762,7 +755,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; struct rq_wqe *srqe; struct sq_sge *hw_sge; - u32 sw_prod, sw_cons, count = 0; + u32 count = 0; int i, next; spin_lock(&srq_hwq->lock); @@ -776,8 +769,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srq->start_idx = srq->swq[next].next_idx; spin_unlock(&srq_hwq->lock); - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL); + srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL); memset(srqe, 0, srq->wqe_size); /* Calculate wqe_size16 and data_len */ for (i = 0, hw_sge = (struct sq_sge *)srqe->data; @@ -793,17 +785,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq, srqe->wr_id[0] = cpu_to_le32((u32)next); srq->swq[next].wr_id = wqe->wr_id; - srq_hwq->prod++; + bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot); spin_lock(&srq_hwq->lock); - sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq); - /* retaining srq_hwq->cons for this logic - * actually the lock is only required to - * read srq_hwq->cons. - */ - sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq); - count = sw_prod > sw_cons ? sw_prod - sw_cons : - srq_hwq->max_elements - sw_cons + sw_prod; + count = __bnxt_qplib_get_avail(srq_hwq); spin_unlock(&srq_hwq->lock); /* Ring DB */ bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ); @@ -850,6 +835,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; int rc; + sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP1, sizeof(req)); @@ -886,6 +872,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (rq->max_wqe) { + rq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); @@ -993,6 +980,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) u32 tbl_indx; u16 nsge; + sq->dbinfo.flags = 0; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_CREATE_QP, sizeof(req)); @@ -1041,6 +1029,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) /* RQ */ if (!qp->srq) { + rq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.sginfo = &rq->sg_info; hwq_attr.stride = sizeof(struct sq_sge); @@ -1455,12 +1444,15 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) { struct bnxt_qplib_hwq *cq_hwq = &cq->hwq; + u32 peek_flags, peek_cons; struct cq_base *hw_cqe; int i; + peek_flags = cq->dbinfo.flags; + peek_cons = cq_hwq->cons; for (i = 0; i < cq_hwq->max_elements; i++) { - hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL); - if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements)) + hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL); + if (!CQE_CMP_VALID(hw_cqe, peek_flags)) continue; /* * The valid test of the entry must be done first before @@ -1490,6 +1482,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp) default: break; } + bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons, + 1, &peek_flags); } } @@ -1962,7 +1956,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp, bnxt_qplib_fill_psn_search(qp, wqe, swq); queue_err: bnxt_qplib_swq_mod_start(sq, wqe_idx); - bnxt_qplib_hwq_incr_prod(hwq, swq->slots); + bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots); qp->wqe_cnt++; done: if (sch_handler) { @@ -2050,7 +2044,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp, base_hdr->wr_id[0] = cpu_to_le32(wqe_idx); queue_err: bnxt_qplib_swq_mod_start(rq, wqe_idx); - bnxt_qplib_hwq_incr_prod(hwq, swq->slots); + bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots); done: if (sch_handler) { nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC); @@ -2087,6 +2081,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) return -EINVAL; } + cq->dbinfo.flags = 0; hwq_attr.res = res; hwq_attr.depth = cq->max_wqe; hwq_attr.stride = sizeof(struct cq_base); @@ -2102,7 +2097,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq) req.dpi = cpu_to_le32(cq->dpi->dpi); req.cq_handle = cpu_to_le64(cq->cq_handle); - req.cq_size = cpu_to_le32(cq->hwq.max_elements); + req.cq_size = cpu_to_le32(cq->max_wqe); pbl = &cq->hwq.pbl[PBL_LVL_0]; pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) << CMDQ_CREATE_CQ_PG_SIZE_SFT); @@ -2145,6 +2140,8 @@ void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res, { bnxt_qplib_free_hwq(res, &cq->hwq); memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq)); + /* Reset only the cons bit in the flags */ + cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT); } int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq, @@ -2241,7 +2238,8 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp, cqe++; (*budget)--; skip_compl: - bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[last].slots, &sq->dbinfo.flags); sq->swq_last = sq->swq[last].next_idx; } *pcqe = cqe; @@ -2288,7 +2286,8 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp, cqe->wr_id = rq->swq[last].wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + rq->swq[last].slots, &rq->dbinfo.flags); rq->swq_last = rq->swq[last].next_idx; } *pcqe = cqe; @@ -2317,7 +2316,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle) static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, u32 cq_cons, u32 swq_last, u32 cqe_sq_cons) { - u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx; + u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags; struct bnxt_qplib_q *sq = &qp->sq; struct cq_req *peek_req_hwcqe; struct bnxt_qplib_qp *peek_qp; @@ -2348,16 +2347,14 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, } if (sq->condition) { /* Peek at the completions */ - peek_raw_cq_cons = cq->hwq.cons; + peek_flags = cq->dbinfo.flags; peek_sw_cq_cons = cq_cons; i = cq->hwq.max_elements; while (i--) { - peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq); peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq, peek_sw_cq_cons, NULL); /* If the next hwcqe is VALID */ - if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons, - cq->hwq.max_elements)) { + if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) { /* * The valid test of the entry must be done first before * reading any further. @@ -2400,8 +2397,9 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq, rc = -EINVAL; goto out; } - peek_sw_cq_cons++; - peek_raw_cq_cons++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, + &peek_sw_cq_cons, + 1, &peek_flags); } dev_err(&cq->hwq.pdev->dev, "Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n", @@ -2488,7 +2486,8 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, } } skip: - bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + swq->slots, &sq->dbinfo.flags); sq->swq_last = swq->next_idx; if (sq->single) break; @@ -2515,7 +2514,8 @@ static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag) srq->swq[srq->last_idx].next_idx = (int)tag; srq->last_idx = (int)tag; srq->swq[srq->last_idx].next_idx = -1; - srq->hwq.cons++; /* Support for SRQE counter */ + bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons, + srq->dbinfo.max_slot, &srq->dbinfo.flags); spin_unlock(&srq->hwq.lock); } @@ -2584,7 +2584,8 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2670,7 +2671,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2687,14 +2689,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq, bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq) { struct cq_base *hw_cqe; - u32 sw_cons, raw_cons; bool rc = true; - raw_cons = cq->hwq.cons; - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit. If the CQE is valid, return false */ - rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements); + rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags); return rc; } @@ -2776,7 +2775,8 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq, cqe->wr_id = swq->wr_id; cqe++; (*budget)--; - bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots); + bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons, + swq->slots, &rq->dbinfo.flags); rq->swq_last = swq->next_idx; *pcqe = cqe; @@ -2849,7 +2849,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq, cqe++; (*budget)--; } - bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots); + bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons, + sq->swq[swq_last].slots, &sq->dbinfo.flags); sq->swq_last = sq->swq[swq_last].next_idx; } *pcqe = cqe; @@ -2934,19 +2935,17 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, int num_cqes, struct bnxt_qplib_qp **lib_qp) { struct cq_base *hw_cqe; - u32 sw_cons, raw_cons; int budget, rc = 0; + u32 hw_polled = 0; u8 type; - raw_cons = cq->hwq.cons; budget = num_cqes; while (budget) { - sw_cons = HWQ_CMP(raw_cons, &cq->hwq); - hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL); + hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL); /* Check for Valid bit */ - if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements)) + if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags)) break; /* @@ -2961,7 +2960,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, rc = bnxt_qplib_cq_process_req(cq, (struct cq_req *)hw_cqe, &cqe, &budget, - sw_cons, lib_qp); + cq->hwq.cons, lib_qp); break; case CQ_BASE_CQE_TYPE_RES_RC: rc = bnxt_qplib_cq_process_res_rc(cq, @@ -3007,12 +3006,13 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe, dev_err(&cq->hwq.pdev->dev, "process_cqe error rc = 0x%x\n", rc); } - raw_cons++; + hw_polled++; + bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons, + 1, &cq->dbinfo.flags); + } - if (cq->hwq.cons != raw_cons) { - cq->hwq.cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ); - } exit: return num_cqes - budget; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 404b851091ca2..23c27cb429786 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -348,9 +348,21 @@ struct bnxt_qplib_qp { #define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG) #define ROCE_CQE_CMP_V 0 -#define CQE_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define CQE_CMP_VALID(hdr, pass) \ (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) + +static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq) +{ + int cons, prod, avail; + + cons = hwq->cons; + prod = hwq->prod; + avail = cons - prod; + if (cons <= prod) + avail += hwq->depth; + return avail; +} static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que, u8 slots) @@ -443,9 +455,9 @@ struct bnxt_qplib_cq { #define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG) #define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG) -#define NQE_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define NQE_CMP_VALID(hdr, pass) \ (!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) #define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index e47b4ca64d33e..15e6d2b80c700 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -734,17 +734,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; struct creq_base *creqe; - u32 sw_cons, raw_cons; unsigned long flags; u32 num_wakeup = 0; + u32 hw_polled = 0; /* Service the CREQ until budget is over */ spin_lock_irqsave(&hwq->lock, flags); - raw_cons = hwq->cons; while (budget > 0) { - sw_cons = HWQ_CMP(raw_cons, hwq); - creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL); - if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements)) + creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL); + if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags)) break; /* The valid test of the entry must be done first before * reading any further. @@ -775,15 +773,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t) type); break; } - raw_cons++; budget--; + hw_polled++; + bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons, + 1, &creq->creq_db.dbinfo.flags); } - if (hwq->cons != raw_cons) { - hwq->cons = raw_cons; + if (hw_polled) bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, true); - } spin_unlock_irqrestore(&hwq->lock, flags); if (num_wakeup) wake_up_nr(&rcfw->cmdq.waitq, num_wakeup); @@ -1113,6 +1111,7 @@ static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt) pdev = rcfw->pdev; creq_db = &rcfw->creq.creq_db; + creq_db->dbinfo.flags = 0; creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION; creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id); if (!creq_db->reg.bar_id) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 7b31bee3e0005..45996e60a0d03 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -141,9 +141,9 @@ struct bnxt_qplib_crsbe { /* Allocate 1 per QP for async error notification for now */ #define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024) #define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */ -#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \ +#define CREQ_CMP_VALID(hdr, pass) \ (!!((hdr)->v & CREQ_BASE_V) == \ - !((raw_cons) & (cp_bit))) + !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK)) #define CREQ_ENTRY_POLL_BUDGET 0x100 /* HWQ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 157db6b7e1193..ae2bde34e785b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -343,7 +343,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, hwq->cons = 0; hwq->pdev = pdev; hwq->depth = hwq_attr->depth; - hwq->max_elements = depth; + hwq->max_elements = hwq->depth; hwq->element_size = stride; hwq->qe_ppg = pg_size / stride; /* For direct access to the elements */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 5949f004f7856..3e3383b8a9135 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -186,6 +186,14 @@ struct bnxt_qplib_db_info { struct bnxt_qplib_hwq *hwq; u32 xid; u32 max_slot; + u32 flags; +}; + +enum bnxt_qplib_db_info_flags_mask { + BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT = 0x0UL, + BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT = 0x1UL, + BNXT_QPLIB_FLAG_EPOCH_CONS_MASK = 0x1UL, + BNXT_QPLIB_FLAG_EPOCH_PROD_MASK = 0x2UL, }; /* Tables */ @@ -396,24 +404,34 @@ void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res); int bnxt_qplib_determine_atomics(struct pci_dev *dev); -static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt) +static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo, + struct bnxt_qplib_hwq *hwq, u32 cnt) { - hwq->prod = (hwq->prod + cnt) % hwq->depth; + /* move prod and update toggle/epoch if wrap around */ + hwq->prod += cnt; + if (hwq->prod >= hwq->depth) { + hwq->prod %= hwq->depth; + dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT; + } } -static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq, - u32 cnt) +static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt, + u32 *dbinfo_flags) { - hwq->cons = (hwq->cons + cnt) % hwq->depth; + /* move cons and update toggle/epoch if wrap around */ + *cons += cnt; + if (*cons >= max_elements) { + *cons %= max_elements; + *dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT; + } } static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info, bool arm) { - u32 key; + u32 key = 0; - key = info->hwq->cons & (info->hwq->max_elements - 1); - key |= (CMPL_DOORBELL_IDX_VALID | + key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID | (CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK)); if (!arm) key |= CMPL_DOORBELL_MASK; @@ -427,8 +445,7 @@ static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info, key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type; key <<= 32; - key |= (info->hwq->cons & (info->hwq->max_elements - 1)) & - DBC_DBC_INDEX_MASK; + key |= (info->hwq->cons & DBC_DBC_INDEX_MASK); writeq(key, info->db); }