From: Yuval Bason <Yuval.Bason@xxxxxxxxxx> Enable SRQ's for rdma-core. SRQ rdma-core implementation is pretty straight forward, except perhaps the Way that the driver udpates FW producers: they are updated using a mapped buffer that FW reads, and not doorbells Like RQ / SQ. Signed-off-by: Yuval Bason <yuval.bason@xxxxxxxxxx> Signed-off-by: Michal Kalderon <michal.kalderon@xxxxxxxxxx> --- kernel-headers/rdma/qedr-abi.h | 15 ++ providers/qedr/qelr.h | 43 ++++++ providers/qedr/qelr_abi.h | 2 + providers/qedr/qelr_main.c | 7 + providers/qedr/qelr_verbs.c | 337 +++++++++++++++++++++++++++++++++++++++++ providers/qedr/qelr_verbs.h | 9 ++ providers/qedr/rdma_common.h | 1 + 7 files changed, 414 insertions(+) diff --git a/kernel-headers/rdma/qedr-abi.h b/kernel-headers/rdma/qedr-abi.h index 24c658b..9031344 100644 --- a/kernel-headers/rdma/qedr-abi.h +++ b/kernel-headers/rdma/qedr-abi.h @@ -111,4 +111,19 @@ struct qedr_create_qp_uresp { __u32 reserved; }; +struct qedr_create_srq_ureq { + /* user space virtual address of producer pair */ + __u64 prod_pair_addr; + + /* SRQ */ + __u64 srq_addr; /* user space virtual address of SRQ buffer */ + __u64 srq_len; /* length of SRQ buffer */ +}; + +struct qedr_create_srq_uresp { + __u16 srq_id; + __u16 reserved0; + __u32 reserved1; +}; + #endif /* __QEDR_USER_H__ */ diff --git a/providers/qedr/qelr.h b/providers/qedr/qelr.h index 0b2e4a2..eeebfe0 100644 --- a/providers/qedr/qelr.h +++ b/providers/qedr/qelr.h @@ -61,6 +61,7 @@ enum DP_MODULE { QELR_MSG_QP = (QELR_MSG_SQ | QELR_MSG_RQ), QELR_MSG_MR = 0x80000, QELR_MSG_INIT = 0x100000, + QELR_MSG_SRQ = 0x200000, /* to be added...up to 0x8000000 */ }; @@ -128,8 +129,10 @@ struct qelr_devctx { uint32_t max_send_wr; uint32_t max_recv_wr; + uint32_t max_srq_wr; uint32_t sges_per_send_wr; uint32_t sges_per_recv_wr; + uint32_t sges_per_srq_wr; int max_cqes; }; @@ -221,6 +224,27 @@ struct qelr_dpm { struct qelr_rdma_ext *rdma_ext; }; +struct qelr_srq_hwq_info { + uint32_t max_sges; + uint32_t max_wr; + struct qelr_chain chain; + uint32_t wqe_prod; /* WQE prod index in HW ring */ + uint32_t sge_prod; /* SGE prod index in HW ring */ + uint32_t wr_prod_cnt; /* wr producer count */ + uint32_t wr_cons_cnt; /* wr consumer count */ + uint32_t num_elems; + + void *virt_prod_pair_addr; /* producer pair virtual address */ + uint64_t phy_prod_pair_addr; /* producer pair physical address */ +}; + +struct qelr_srq { + struct ibv_srq ibv_srq; + struct qelr_srq_hwq_info hw_srq; + uint16_t srq_id; + pthread_spinlock_t lock; +}; + struct qelr_qp { struct ibv_qp ibv_qp; pthread_spinlock_t q_lock; @@ -247,6 +271,7 @@ struct qelr_qp { int sq_sig_all; int atomic_supported; uint8_t edpm_disabled; + struct qelr_srq *srq; }; static inline struct qelr_devctx *get_qelr_ctx(struct ibv_context *ibctx) @@ -274,6 +299,11 @@ static inline struct qelr_cq *get_qelr_cq(struct ibv_cq *ibcq) return container_of(ibcq, struct qelr_cq, ibv_cq); } +static inline struct qelr_srq *get_qelr_srq(struct ibv_srq *ibsrq) +{ + return container_of(ibsrq, struct qelr_srq, ibv_srq); +} + #define SET_FIELD(value, name, flag) \ do { \ (value) &= ~(name ## _MASK << name ## _SHIFT); \ @@ -308,6 +338,19 @@ static inline struct qelr_cq *get_qelr_cq(struct ibv_cq *ibcq) (sge)->flags = htole32(vflags); \ } while (0) +#define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ + do { \ + TYPEPTR_ADDR_SET(hdr, wr_id, vwr_id); \ + (hdr)->num_sges = num_sge; \ + } while (0) + +#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ + do { \ + TYPEPTR_ADDR_SET(sge, addr, vaddr); \ + (sge)->length = htole32(vlength); \ + (sge)->l_key = htole32(vlkey); \ + } while (0) + #define U64_HI(val) ((uint32_t)(((uint64_t)(uintptr_t)(val)) >> 32)) #define U64_LO(val) ((uint32_t)(((uint64_t)(uintptr_t)(val)) & 0xffffffff)) #define HILO_U64(hi, lo) ((uintptr_t)((((uint64_t)(hi)) << 32) + (lo))) diff --git a/providers/qedr/qelr_abi.h b/providers/qedr/qelr_abi.h index 3666845..c674ddc 100644 --- a/providers/qedr/qelr_abi.h +++ b/providers/qedr/qelr_abi.h @@ -49,5 +49,7 @@ DECLARE_DRV_CMD(qelr_get_context, IB_USER_VERBS_CMD_GET_CONTEXT, empty, qedr_alloc_ucontext_resp); DECLARE_DRV_CMD(qelr_reg_mr, IB_USER_VERBS_CMD_REG_MR, empty, empty); +DECLARE_DRV_CMD(qelr_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, + qedr_create_srq_ureq, qedr_create_srq_uresp); #endif /* __QELR_ABI_H__ */ diff --git a/providers/qedr/qelr_main.c b/providers/qedr/qelr_main.c index e99fc88..40742fb 100644 --- a/providers/qedr/qelr_main.c +++ b/providers/qedr/qelr_main.c @@ -96,6 +96,11 @@ static const struct verbs_context_ops qelr_ctx_ops = { .query_qp = qelr_query_qp, .modify_qp = qelr_modify_qp, .destroy_qp = qelr_destroy_qp, + .create_srq = qelr_create_srq, + .destroy_srq = qelr_destroy_srq, + .modify_srq = qelr_modify_srq, + .query_srq = qelr_query_srq, + .post_srq_recv = qelr_post_srq_recv, .post_send = qelr_post_send, .post_recv = qelr_post_recv, .async_event = qelr_async_event, @@ -183,8 +188,10 @@ static struct verbs_context *qelr_alloc_context(struct ibv_device *ibdev, ctx->db_size = resp.db_size; ctx->max_send_wr = resp.max_send_wr; ctx->max_recv_wr = resp.max_recv_wr; + ctx->max_srq_wr = resp.max_srq_wr; ctx->sges_per_send_wr = resp.sges_per_send_wr; ctx->sges_per_recv_wr = resp.sges_per_recv_wr; + ctx->sges_per_srq_wr = resp.sges_per_recv_wr; ctx->max_cqes = resp.max_cqes; ctx->db_addr = mmap(NULL, ctx->db_size, PROT_WRITE, MAP_SHARED, diff --git a/providers/qedr/qelr_verbs.c b/providers/qedr/qelr_verbs.c index 28ea094..4bd155d 100644 --- a/providers/qedr/qelr_verbs.c +++ b/providers/qedr/qelr_verbs.c @@ -61,6 +61,7 @@ #define IS_IWARP(_dev) (_dev->node_type == IBV_NODE_RNIC) #define IS_ROCE(_dev) (_dev->node_type == IBV_NODE_CA) +#define HILO_64(hi, lo) ((((uint64_t)(hi)) << 32) + (lo)) static void qelr_inc_sw_cons_u16(struct qelr_qp_hwq_info *info) { @@ -313,6 +314,180 @@ int qelr_destroy_cq(struct ibv_cq *ibv_cq) return 0; } +int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr) +{ + struct qelr_devctx *cxt = get_qelr_ctx(ibv_srq->context); + struct qelr_srq *srq = get_qelr_srq(ibv_srq); + struct ibv_query_srq cmd; + int rc; + + rc = ibv_cmd_query_srq(ibv_srq, attr, &cmd, sizeof(cmd)); + if (rc) { + DP_ERR(cxt->dbg_fp, "query srq: failed to query %p, got %d.\n", + srq, rc); + return rc; + } + + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, + "query srq: successfully queried %p\n", srq); + + return 0; +} + +int qelr_modify_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr, + int attr_mask) +{ + struct qelr_devctx *cxt = get_qelr_ctx(ibv_srq->context); + struct qelr_srq *srq = get_qelr_srq(ibv_srq); + struct ibv_modify_srq cmd; + int rc; + + rc = ibv_cmd_modify_srq(ibv_srq, attr, attr_mask, &cmd, sizeof(cmd)); + if (rc) { + DP_ERR(cxt->dbg_fp, + "modify srq: failed to modify %p, got %d.\n", srq, rc); + return rc; + } + + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, + "modify srq: successfully modified %p\n", srq); + + return 0; +} + +static void qelr_destroy_srq_buffers(struct ibv_srq *ibv_srq) +{ + struct qelr_srq *srq = get_qelr_srq(ibv_srq); + uint32_t *virt_prod_pair_addr; + uint32_t prod_size; + + qelr_chain_free(&srq->hw_srq.chain); + + virt_prod_pair_addr = srq->hw_srq.virt_prod_pair_addr; + prod_size = sizeof(struct rdma_srq_producers); + + ibv_dofork_range(virt_prod_pair_addr, prod_size); + munmap(virt_prod_pair_addr, prod_size); +} + +int qelr_destroy_srq(struct ibv_srq *ibv_srq) +{ + struct qelr_devctx *cxt = get_qelr_ctx(ibv_srq->context); + struct qelr_srq *srq = get_qelr_srq(ibv_srq); + int rc; + + rc = ibv_cmd_destroy_srq(ibv_srq); + if (rc) { + DP_ERR(cxt->dbg_fp, + "destroy srq: failed to destroy %p, got %d.\n", srq, rc); + return rc; + } + + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, + "destroy srq: successfully destroyed %p\n", srq); + + qelr_destroy_srq_buffers(ibv_srq); + free(srq); + + return 0; +} + +static inline void +qelr_create_srq_configure_req(struct qelr_srq *srq, + struct qelr_create_srq *req) +{ + req->srq_addr = (uintptr_t)srq->hw_srq.chain.first_addr; + req->srq_len = srq->hw_srq.chain.size; + req->prod_pair_addr = (uintptr_t)srq->hw_srq.virt_prod_pair_addr; +} + +static inline int qelr_create_srq_buffers(struct qelr_devctx *cxt, + struct qelr_srq *srq, + struct ibv_srq_init_attr *attrs) +{ + uint32_t max_wr, max_sges; + int chain_size, prod_size; + void *addr; + int rc; + + max_wr = attrs->attr.max_wr; + if (!max_wr) + return -EINVAL; + + max_wr = min_t(uint32_t, max_wr, cxt->max_srq_wr); + max_sges = max_wr * (cxt->sges_per_srq_wr + 1); /* +1 for header */ + chain_size = max_sges * QELR_RQE_ELEMENT_SIZE; + + rc = qelr_chain_alloc(&srq->hw_srq.chain, chain_size, + cxt->kernel_page_size, QELR_RQE_ELEMENT_SIZE); + if (rc) { + DP_ERR(cxt->dbg_fp, + "create srq: failed to map srq, got %d", rc); + return rc; + } + + prod_size = sizeof(struct rdma_srq_producers); + addr = mmap(NULL, prod_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, + 0); + if (addr == MAP_FAILED) { + DP_ERR(cxt->dbg_fp, + "create srq: failed to map producer, got %d", errno); + qelr_chain_free(&srq->hw_srq.chain); + return errno; + } + + rc = ibv_dontfork_range(addr, prod_size); + if (rc) { + munmap(addr, prod_size); + qelr_chain_free(&srq->hw_srq.chain); + return rc; + } + + srq->hw_srq.virt_prod_pair_addr = addr; + srq->hw_srq.max_sges = cxt->sges_per_srq_wr; + srq->hw_srq.max_wr = max_wr; + + return 0; +} + +struct ibv_srq *qelr_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *init_attr) +{ + struct qelr_devctx *cxt = get_qelr_ctx(pd->context); + struct qelr_create_srq req; + struct qelr_create_srq_resp resp; + struct qelr_srq *srq; + int rc, status = 0; + + srq = calloc(1, sizeof(*srq)); + if (!srq) + goto err0; + + rc = qelr_create_srq_buffers(cxt, srq, init_attr); + if (rc) + goto err1; + + pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE); + qelr_create_srq_configure_req(srq, &req); + status = ibv_cmd_create_srq(pd, &srq->ibv_srq, init_attr, &req.ibv_cmd, + sizeof(req), &resp.ibv_resp, sizeof(resp)); + if (status) + goto err1; + + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, + "create srq: successfully created %p.\n", srq); + return &srq->ibv_srq; + +err1: + qelr_destroy_srq_buffers(&srq->ibv_srq); + free(srq); +err0: + DP_ERR(cxt->dbg_fp, + "create srq: failed to create %p.\n", srq); + return NULL; +} + static void qelr_free_rq(struct qelr_qp *qp) { free(qp->rqe_wr_id); @@ -333,6 +508,11 @@ static void qelr_chain_free_rq(struct qelr_qp *qp) qelr_chain_free(&qp->rq.chain); } +static inline bool qelr_qp_has_srq(struct qelr_qp *qp) +{ + return !!qp->srq; +} + static inline int qelr_create_qp_buffers_sq(struct qelr_devctx *cxt, struct qelr_qp *qp, struct ibv_qp_init_attr *attrs) @@ -531,6 +711,9 @@ struct ibv_qp *qelr_create_qp(struct ibv_pd *pd, if (!qp) return NULL; + if (attrs->srq) + qp->srq = get_qelr_srq(attrs->srq); + rc = qelr_create_qp_buffers(cxt, qp, attrs); if (rc) goto err0; @@ -1485,6 +1668,107 @@ int qelr_post_send(struct ibv_qp *ib_qp, struct ibv_send_wr *wr, return rc; } +static uint32_t qelr_srq_elem_left(struct qelr_srq_hwq_info *hw_srq) +{ + uint32_t used; + + /* Calculate number of elements used based on producer + * count and consumer count and subtract it from max + * work request supported so that we get elements left. + */ + used = (uint32_t)(((uint64_t)((uint64_t)~0U) + 1 + + (uint64_t)(hw_srq->wr_prod_cnt)) - + (uint64_t)hw_srq->wr_cons_cnt); + + return hw_srq->max_wr - used; +} + +int qelr_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr) +{ + struct qelr_devctx *cxt = get_qelr_ctx(ibsrq->context); + struct qelr_srq *srq = get_qelr_srq(ibsrq); + struct qelr_srq_hwq_info *hw_srq = &srq->hw_srq; + struct qelr_chain *chain; + int status = 0; + + pthread_spin_lock(&srq->lock); + + chain = &srq->hw_srq.chain; + while (wr) { + struct rdma_srq_wqe_header *hdr; + int i; + + if (!qelr_srq_elem_left(hw_srq) || + wr->num_sge > srq->hw_srq.max_sges) { + DP_ERR(cxt->dbg_fp, + "Can't post WR (%d,%d) || (%d > %d)\n", + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + wr->num_sge, + srq->hw_srq.max_sges); + status = -ENOMEM; + *bad_wr = wr; + break; + } + + hdr = qelr_chain_produce(chain); + + /* Set number of sge and work request id in header */ + SRQ_HDR_SET(hdr, wr->wr_id, wr->num_sge); + + /* PBL is maintained in case of WR granularity. + * So increment WR producer in case we post a WR. + */ + hw_srq->wr_prod_cnt++; + hw_srq->wqe_prod++; + hw_srq->sge_prod++; + + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, + "SRQ WR: SGEs: %d with wr_id[%d] = %lx\n", + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_srq_sge *srq_sge; + + srq_sge = qelr_chain_produce(chain); + /* Set SGE length, lkey and address */ + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, + wr->sg_list[i].length, wr->sg_list[i].lkey); + + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, + "[%d]: len %d key %x addr %x:%x\n", + i, srq_sge->length, srq_sge->l_key, + srq_sge->addr.hi, srq_sge->addr.lo); + hw_srq->sge_prod++; + } + + /* Flush WQE and SGE information before updating producer */ + mmio_wc_start(); + + /* SRQ producer is 8 bytes. Need to update SGE producer index + * in first 4 bytes and need to update WQE producer in + * next 4 bytes. + */ + + struct rdma_srq_producers *virt_prod; + + virt_prod = srq->hw_srq.virt_prod_pair_addr; + virt_prod->sge_prod = hw_srq->sge_prod; + virt_prod->wqe_prod = hw_srq->wqe_prod; + + /* Flush producer after updating it. */ + mmio_flush_writes(); + wr = wr->next; + } + + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, + "POST: Elements in SRQ: %d\n", + qelr_chain_get_elem_left_u32(chain)); + pthread_spin_unlock(&srq->lock); + + return status; +} + int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) { @@ -1494,6 +1778,13 @@ int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, uint16_t db_val; uint8_t iwarp = IS_IWARP(ibqp->context->device); + if (unlikely(qelr_qp_has_srq(qp))) { + DP_ERR(cxt->dbg_fp, + "QP is associated with SRQ, cannot post RQ buffers\n"); + *bad_wr = wr; + return -EINVAL; + } + pthread_spin_lock(&qp->q_lock); if (!iwarp && qp->state == QELR_QPS_RST) { @@ -1826,6 +2117,32 @@ static void __process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq, wc->qp_num = qp->qp_id; } +static int process_resp_one_srq(struct qelr_qp *qp, struct qelr_cq *cq, + struct ibv_wc *wc, + struct rdma_cqe_responder *resp) +{ + struct qelr_srq_hwq_info *hw_srq = &qp->srq->hw_srq; + uint64_t wr_id; + + wr_id = HILO_64(resp->srq_wr_id.hi, resp->srq_wr_id.lo); + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + wc->byte_len = 0; + wc->status = IBV_WC_WR_FLUSH_ERR; + wc->qp_num = qp->qp_id; + wc->wr_id = wr_id; + } else { + __process_resp_one(qp, cq, wc, resp, wr_id); + } + + /* PBL is maintained in case of WR granularity. + * So increment WR consumer after consuming WR + */ + hw_srq->wr_cons_cnt++; + + return 1; +} + static int process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq, struct ibv_wc *wc, struct rdma_cqe_responder *resp) { @@ -1891,6 +2208,19 @@ static void try_consume_resp_cqe(struct qelr_cq *cq, struct qelr_qp *qp, } } +static int qelr_poll_cq_resp_srq(struct qelr_qp *qp, struct qelr_cq *cq, + int num_entries, struct ibv_wc *wc, + struct rdma_cqe_responder *resp, int *update) +{ + int cnt; + + cnt = process_resp_one_srq(qp, cq, wc, resp); + consume_cqe(cq); + *update |= 1; + + return cnt; +} + static int qelr_poll_cq_resp(struct qelr_qp *qp, struct qelr_cq *cq, int num_entries, struct ibv_wc *wc, struct rdma_cqe_responder *resp, int *update) @@ -1952,6 +2282,10 @@ int qelr_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) cnt = qelr_poll_cq_resp(qp, cq, num_entries, wc, &cqe->resp, &update); break; + case RDMA_CQE_TYPE_RESPONDER_SRQ: + cnt = qelr_poll_cq_resp_srq(qp, cq, num_entries, wc, + &cqe->resp, &update); + break; case RDMA_CQE_TYPE_INVALID: default: printf("Error: invalid CQE type = %d\n", @@ -2018,6 +2352,9 @@ void qelr_async_event(struct ibv_async_event *event) case IBV_EVENT_COMM_EST: case IBV_EVENT_QP_LAST_WQE_REACHED: break; + case IBV_EVENT_SRQ_LIMIT_REACHED: + case IBV_EVENT_SRQ_ERR: + return; case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_PORT_ERR: break; diff --git a/providers/qedr/qelr_verbs.h b/providers/qedr/qelr_verbs.h index 50d1182..26802c3 100644 --- a/providers/qedr/qelr_verbs.h +++ b/providers/qedr/qelr_verbs.h @@ -73,5 +73,14 @@ int qelr_post_send(struct ibv_qp *ib_qp, struct ibv_send_wr *wr, int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr); +int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr); +int qelr_modify_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr, + int attr_mask); +struct ibv_srq *qelr_create_srq(struct ibv_pd *pd, + struct ibv_srq_init_attr *init_attr); +int qelr_destroy_srq(struct ibv_srq *ibv_srq); +int qelr_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); + void qelr_async_event(struct ibv_async_event *event); #endif /* __QELR_VERBS_H__ */ diff --git a/providers/qedr/rdma_common.h b/providers/qedr/rdma_common.h index 0707e17..f2d76bb 100644 --- a/providers/qedr/rdma_common.h +++ b/providers/qedr/rdma_common.h @@ -53,6 +53,7 @@ #define RDMA_MAX_CQS (64*1024) #define RDMA_MAX_TIDS (128*1024-1) #define RDMA_MAX_PDS (64*1024) +#define RDMA_MAX_SRQS (32*1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS #define RDMA_NUM_STATISTIC_COUNTERS_K2 MAX_NUM_VPORTS_K2 -- 2.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html