This is to add Broadcom's 57500 series of adapters support to RoCE from libbnxt_re. Listing below the significant changes done as part of the patch. - Added the pci-id of the basic gen-p5 chip. - Adjust psn search memory allocation to suite new search psn structure. - Added chip context structure to select the appropriate execution flow in data-path and control path. - Fill psn search area as per new or older chip execution flow. - removed duplicate declaration of BNXT_RE_ABI_VERSION macro - Implemented comp_mask approach to detect the ABI changes. Signed-off-by: Devesh Sharma <devesh.sharma@xxxxxxxxxxxx> --- kernel-headers/rdma/bnxt_re-abi.h | 11 +++++++++ providers/bnxt_re/bnxt_re-abi.h | 10 ++++++-- providers/bnxt_re/main.c | 14 +++++++++++ providers/bnxt_re/main.h | 15 +++++++++++- providers/bnxt_re/verbs.c | 50 +++++++++++++++++++++++++++------------ 5 files changed, 82 insertions(+), 18 deletions(-) diff --git a/kernel-headers/rdma/bnxt_re-abi.h b/kernel-headers/rdma/bnxt_re-abi.h index a7a6111..dc52e3c 100644 --- a/kernel-headers/rdma/bnxt_re-abi.h +++ b/kernel-headers/rdma/bnxt_re-abi.h @@ -44,6 +44,14 @@ #define BNXT_RE_ABI_VERSION 1 +#define BNXT_RE_CHIP_ID0_CHIP_NUM_SFT 0x00 +#define BNXT_RE_CHIP_ID0_CHIP_REV_SFT 0x10 +#define BNXT_RE_CHIP_ID0_CHIP_MET_SFT 0x18 + +enum { + BNXT_RE_UCNTX_CMASK_HAVE_CCTX = 0x1ULL +}; + struct bnxt_re_uctx_resp { __u32 dev_id; __u32 max_qp; @@ -51,6 +59,9 @@ struct bnxt_re_uctx_resp { __u32 cqe_sz; __u32 max_cqd; __u32 rsvd; + __aligned_u64 comp_mask; + __u32 chip_id0; + __u32 chip_id1; }; /* diff --git a/providers/bnxt_re/bnxt_re-abi.h b/providers/bnxt_re/bnxt_re-abi.h index 65d048d..c6998e8 100644 --- a/providers/bnxt_re/bnxt_re-abi.h +++ b/providers/bnxt_re/bnxt_re-abi.h @@ -43,8 +43,6 @@ #include <rdma/bnxt_re-abi.h> #include <kernel-abi/bnxt_re-abi.h> -#define BNXT_RE_ABI_VERSION 1 - #define BNXT_RE_FULL_FLAG_DELTA 0x80 DECLARE_DRV_CMD(ubnxt_re_pd, IB_USER_VERBS_CMD_ALLOC_PD, @@ -246,6 +244,14 @@ struct bnxt_re_psns { __le32 flg_npsn; }; +struct bnxt_re_psns_ext { + __u32 opc_spsn; + __u32 flg_npsn; + __u16 st_slot_idx; + __u16 rsvd0; + __u32 rsvd1; +}; + struct bnxt_re_sge { __le64 pa; __le32 lkey; diff --git a/providers/bnxt_re/main.c b/providers/bnxt_re/main.c index 1cd4d88..d171748 100644 --- a/providers/bnxt_re/main.c +++ b/providers/bnxt_re/main.c @@ -74,6 +74,7 @@ static const struct verbs_match_ent cna_table[] = { CNA(BROADCOM, 0x16EF), /* BCM57416 NPAR */ CNA(BROADCOM, 0x16F0), /* BCM58730 */ CNA(BROADCOM, 0x16F1), /* BCM57452 */ + CNA(BROADCOM, 0x1750), /* BCM57500 */ CNA(BROADCOM, 0xD800), /* BCM880xx VF */ CNA(BROADCOM, 0xD802), /* BCM58802 */ CNA(BROADCOM, 0xD804), /* BCM8804 SR */ @@ -108,6 +109,11 @@ static const struct verbs_context_ops bnxt_re_cntx_ops = { .destroy_ah = bnxt_re_destroy_ah }; +bool bnxt_re_is_chip_gen_p5(struct bnxt_re_chip_ctx *cctx) +{ + return cctx->chip_num == CHIP_NUM_57500; +} + /* Context Init functions */ static struct verbs_context *bnxt_re_alloc_context(struct ibv_device *vdev, int cmd_fd, @@ -133,6 +139,14 @@ static struct verbs_context *bnxt_re_alloc_context(struct ibv_device *vdev, dev->pg_size = resp.pg_size; dev->cqe_size = resp.cqe_sz; dev->max_cq_depth = resp.max_cqd; + if (resp.comp_mask & BNXT_RE_UCNTX_CMASK_HAVE_CCTX) { + cntx->cctx.chip_num = resp.chip_id0 & 0xFFFF; + cntx->cctx.chip_rev = (resp.chip_id0 >> + BNXT_RE_CHIP_ID0_CHIP_REV_SFT) & 0xFF; + cntx->cctx.chip_metal = (resp.chip_id0 >> + BNXT_RE_CHIP_ID0_CHIP_MET_SFT) & + 0xFF; + } pthread_spin_init(&cntx->fqlock, PTHREAD_PROCESS_PRIVATE); /* mmap shared page. */ cntx->shpg = mmap(NULL, dev->pg_size, PROT_READ | PROT_WRITE, diff --git a/providers/bnxt_re/main.h b/providers/bnxt_re/main.h index 0b5c749..be57349 100644 --- a/providers/bnxt_re/main.h +++ b/providers/bnxt_re/main.h @@ -54,7 +54,14 @@ #define DEV "bnxt_re : " -#define BNXT_RE_UD_QP_HW_STALL 0x400000 +#define BNXT_RE_UD_QP_HW_STALL 0x400000 + +#define CHIP_NUM_57500 0x1750 +struct bnxt_re_chip_ctx { + __u16 chip_num; + __u8 chip_rev; + __u8 chip_metal; +}; struct bnxt_re_dpi { __u32 dpindx; @@ -81,6 +88,7 @@ struct bnxt_re_cq { }; struct bnxt_re_wrid { + struct bnxt_re_psns_ext *psns_ext; struct bnxt_re_psns *psns; uint64_t wrid; uint32_t bytes; @@ -111,6 +119,7 @@ struct bnxt_re_srq { struct bnxt_re_qp { struct ibv_qp ibvqp; + struct bnxt_re_chip_ctx *cctx; struct bnxt_re_queue *sqq; struct bnxt_re_wrid *swrid; struct bnxt_re_queue *rqq; @@ -155,6 +164,7 @@ struct bnxt_re_context { struct verbs_context ibvctx; uint32_t dev_id; uint32_t max_qp; + struct bnxt_re_chip_ctx cctx; uint32_t max_srq; struct bnxt_re_dpi udpi; void *shpg; @@ -162,6 +172,9 @@ struct bnxt_re_context { pthread_spinlock_t fqlock; }; +/* Chip context related functions */ +bool bnxt_re_is_chip_gen_p5(struct bnxt_re_chip_ctx *cctx); + /* DB ring functions used internally*/ void bnxt_re_ring_rq_db(struct bnxt_re_qp *qp); void bnxt_re_ring_sq_db(struct bnxt_re_qp *qp); diff --git a/providers/bnxt_re/verbs.c b/providers/bnxt_re/verbs.c index 7786d24..bec382b 100644 --- a/providers/bnxt_re/verbs.c +++ b/providers/bnxt_re/verbs.c @@ -844,9 +844,11 @@ static void bnxt_re_free_queues(struct bnxt_re_qp *qp) static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp, struct ibv_qp_init_attr *attr, uint32_t pg_size) { + struct bnxt_re_psns_ext *psns_ext; struct bnxt_re_queue *que; struct bnxt_re_psns *psns; uint32_t psn_depth; + uint32_t psn_size; int ret, indx; que = qp->sqq; @@ -857,11 +859,12 @@ static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp, que->diff = que->depth - attr->cap.max_send_wr; /* psn_depth extra entries of size que->stride */ - psn_depth = (que->depth * sizeof(struct bnxt_re_psns)) / - que->stride; - if ((que->depth * sizeof(struct bnxt_re_psns)) % que->stride) + psn_size = bnxt_re_is_chip_gen_p5(qp->cctx) ? + sizeof(struct bnxt_re_psns_ext) : + sizeof(struct bnxt_re_psns); + psn_depth = (que->depth * psn_size) / que->stride; + if ((que->depth * psn_size) % que->stride) psn_depth++; - que->depth += psn_depth; /* PSN-search memory is allocated without checking for * QP-Type. Kenrel driver do not map this memory if it @@ -875,6 +878,7 @@ static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp, que->depth -= psn_depth; /* start of spsn space sizeof(struct bnxt_re_psns) each. */ psns = (que->va + que->stride * que->depth); + psns_ext = (struct bnxt_re_psns_ext *)psns; pthread_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE); qp->swrid = calloc(que->depth, sizeof(struct bnxt_re_wrid)); if (!qp->swrid) { @@ -884,6 +888,13 @@ static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp, for (indx = 0 ; indx < que->depth; indx++, psns++) qp->swrid[indx].psns = psns; + if (bnxt_re_is_chip_gen_p5(qp->cctx)) { + for (indx = 0 ; indx < que->depth; indx++, psns_ext++) { + qp->swrid[indx].psns_ext = psns_ext; + qp->swrid[indx].psns = (struct bnxt_re_psns *)psns_ext; + } + } + qp->cap.max_swr = que->depth; if (qp->rqq) { @@ -931,6 +942,7 @@ struct ibv_qp *bnxt_re_create_qp(struct ibv_pd *ibvpd, if (bnxt_re_alloc_queue_ptr(qp, attr)) goto fail; /* alloc queues */ + qp->cctx = &cntx->cctx; if (bnxt_re_alloc_queues(qp, attr, dev->pg_size)) goto failq; /* Fill ibv_cmd */ @@ -1094,26 +1106,36 @@ static int bnxt_re_build_sge(struct bnxt_re_sge *sge, struct ibv_sge *sg_list, return length; } -static void bnxt_re_fill_psns(struct bnxt_re_qp *qp, struct bnxt_re_psns *psns, +static void bnxt_re_fill_psns(struct bnxt_re_qp *qp, struct bnxt_re_wrid *wrid, uint8_t opcode, uint32_t len) { - uint32_t pkt_cnt = 0, nxt_psn; + uint32_t opc_spsn = 0, flg_npsn = 0; + struct bnxt_re_psns_ext *psns_ext; + uint32_t pkt_cnt = 0, nxt_psn = 0; + struct bnxt_re_psns *psns; + + psns = wrid->psns; + psns_ext = wrid->psns_ext; - memset(psns, 0, sizeof(*psns)); if (qp->qptyp == IBV_QPT_RC) { - psns->opc_spsn = htole32(qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK); + opc_spsn = qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK; pkt_cnt = (len / qp->mtu); if (len % qp->mtu) pkt_cnt++; if (len == 0) pkt_cnt = 1; nxt_psn = ((qp->sq_psn + pkt_cnt) & BNXT_RE_PSNS_NPSN_MASK); - psns->flg_npsn = htole32(nxt_psn); + flg_npsn = nxt_psn; qp->sq_psn = nxt_psn; } opcode = bnxt_re_ibv_wr_to_wc_opcd(opcode); - psns->opc_spsn |= htole32(((opcode & BNXT_RE_PSNS_OPCD_MASK) << - BNXT_RE_PSNS_OPCD_SHIFT)); + opc_spsn |= (((uint32_t)opcode & BNXT_RE_PSNS_OPCD_MASK) << + BNXT_RE_PSNS_OPCD_SHIFT); + memset(psns, 0, sizeof(*psns)); + psns->opc_spsn = htole32(opc_spsn); + psns->flg_npsn = htole32(flg_npsn); + if (bnxt_re_is_chip_gen_p5(qp->cctx)) + psns_ext->st_slot_idx = 0; } static void bnxt_re_fill_wrid(struct bnxt_re_wrid *wrid, struct ibv_send_wr *wr, @@ -1235,10 +1257,9 @@ int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, { struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); struct bnxt_re_queue *sq = qp->sqq; - struct bnxt_re_bsqe *hdr; struct bnxt_re_wrid *wrid; - struct bnxt_re_psns *psns; uint8_t is_inline = false; + struct bnxt_re_bsqe *hdr; int ret = 0, bytes = 0; bool ring_db = false; void *sqe; @@ -1268,7 +1289,6 @@ int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, sqe = (void *)(sq->va + (sq->tail * sq->stride)); wrid = &qp->swrid[sq->tail]; - psns = wrid->psns; memset(sqe, 0, bnxt_re_get_sqe_sz()); hdr = sqe; @@ -1318,7 +1338,7 @@ int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, } bnxt_re_fill_wrid(wrid, wr, bytes, qp->cap.sqsig); - bnxt_re_fill_psns(qp, psns, wr->opcode, bytes); + bnxt_re_fill_psns(qp, wrid, wr->opcode, bytes); bnxt_re_incr_tail(sq); qp->wqe_cnt++; wr = wr->next; -- 1.8.3.1