On Mon, Jul 23, 2018 at 05:57:51PM +0300, Yuval Bason wrote: > Enable SRQ's for rdma-core. > > SRQ rdma-core implementation is pretty straight forward, except perhaps > the way that the driver udpates FW producers: they are updated using a > mapped buffer that FW reads, and not doorbells Like RQ / SQ. udpates -> updates > > Signed-off-by: Yuval Bason <yuval.bason@xxxxxxxxxx> > Signed-off-by: Michal Kalderon <michal.kalderon@xxxxxxxxxx> > Please put "---" before the line below, so it won't be visible in commit log. > Changes from v1 to v2: > - Clean SRQ verbs to follow common style. > - Code cleanup - one-time-use macro, inline function with one line. > - Use udma_to_device_barrier() to write to a system memory instead of > mmio_flush_writes (not a BAR backed memory). > --- > providers/qedr/qelr.h | 43 +++++++ > providers/qedr/qelr_abi.h | 2 + > providers/qedr/qelr_main.c | 7 ++ > providers/qedr/qelr_verbs.c | 283 +++++++++++++++++++++++++++++++++++++++++++ > providers/qedr/qelr_verbs.h | 9 ++ > providers/qedr/rdma_common.h | 1 + > 6 files changed, 345 insertions(+) > > diff --git a/providers/qedr/qelr.h b/providers/qedr/qelr.h > index 0b2e4a2..eeebfe0 100644 > --- a/providers/qedr/qelr.h > +++ b/providers/qedr/qelr.h > @@ -61,6 +61,7 @@ enum DP_MODULE { > QELR_MSG_QP = (QELR_MSG_SQ | QELR_MSG_RQ), > QELR_MSG_MR = 0x80000, > QELR_MSG_INIT = 0x100000, > + QELR_MSG_SRQ = 0x200000, > /* to be added...up to 0x8000000 */ > }; > > @@ -128,8 +129,10 @@ struct qelr_devctx { > > uint32_t max_send_wr; > uint32_t max_recv_wr; > + uint32_t max_srq_wr; > uint32_t sges_per_send_wr; > uint32_t sges_per_recv_wr; > + uint32_t sges_per_srq_wr; > int max_cqes; > }; > > @@ -221,6 +224,27 @@ struct qelr_dpm { > struct qelr_rdma_ext *rdma_ext; > }; > > +struct qelr_srq_hwq_info { > + uint32_t max_sges; > + uint32_t max_wr; > + struct qelr_chain chain; > + uint32_t wqe_prod; /* WQE prod index in HW ring */ > + uint32_t sge_prod; /* SGE prod index in HW ring */ > + uint32_t wr_prod_cnt; /* wr producer count */ > + uint32_t wr_cons_cnt; /* wr consumer count */ > + uint32_t num_elems; > + > + void *virt_prod_pair_addr; /* producer pair virtual address */ Don't you want to declare it as uintptr_t? > + uint64_t phy_prod_pair_addr; /* producer pair physical address */ I don't see any usage of this field. > +}; > + > +struct qelr_srq { > + struct ibv_srq ibv_srq; > + struct qelr_srq_hwq_info hw_srq; > + uint16_t srq_id; > + pthread_spinlock_t lock; > +}; > + > struct qelr_qp { > struct ibv_qp ibv_qp; > pthread_spinlock_t q_lock; > @@ -247,6 +271,7 @@ struct qelr_qp { > int sq_sig_all; > int atomic_supported; > uint8_t edpm_disabled; > + struct qelr_srq *srq; > }; > > static inline struct qelr_devctx *get_qelr_ctx(struct ibv_context *ibctx) > @@ -274,6 +299,11 @@ static inline struct qelr_cq *get_qelr_cq(struct ibv_cq *ibcq) > return container_of(ibcq, struct qelr_cq, ibv_cq); > } > > +static inline struct qelr_srq *get_qelr_srq(struct ibv_srq *ibsrq) > +{ > + return container_of(ibsrq, struct qelr_srq, ibv_srq); > +} > + > #define SET_FIELD(value, name, flag) \ > do { \ > (value) &= ~(name ## _MASK << name ## _SHIFT); \ > @@ -308,6 +338,19 @@ static inline struct qelr_cq *get_qelr_cq(struct ibv_cq *ibcq) > (sge)->flags = htole32(vflags); \ > } while (0) > > +#define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ > + do { \ > + TYPEPTR_ADDR_SET(hdr, wr_id, vwr_id); \ > + (hdr)->num_sges = num_sge; \ > + } while (0) > + > +#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ > + do { \ > + TYPEPTR_ADDR_SET(sge, addr, vaddr); \ > + (sge)->length = htole32(vlength); \ > + (sge)->l_key = htole32(vlkey); \ > + } while (0) > + > #define U64_HI(val) ((uint32_t)(((uint64_t)(uintptr_t)(val)) >> 32)) > #define U64_LO(val) ((uint32_t)(((uint64_t)(uintptr_t)(val)) & 0xffffffff)) > #define HILO_U64(hi, lo) ((uintptr_t)((((uint64_t)(hi)) << 32) + (lo))) > diff --git a/providers/qedr/qelr_abi.h b/providers/qedr/qelr_abi.h > index 3666845..c674ddc 100644 > --- a/providers/qedr/qelr_abi.h > +++ b/providers/qedr/qelr_abi.h > @@ -49,5 +49,7 @@ DECLARE_DRV_CMD(qelr_get_context, IB_USER_VERBS_CMD_GET_CONTEXT, > empty, qedr_alloc_ucontext_resp); > DECLARE_DRV_CMD(qelr_reg_mr, IB_USER_VERBS_CMD_REG_MR, > empty, empty); > +DECLARE_DRV_CMD(qelr_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, > + qedr_create_srq_ureq, qedr_create_srq_uresp); > > #endif /* __QELR_ABI_H__ */ > diff --git a/providers/qedr/qelr_main.c b/providers/qedr/qelr_main.c > index e99fc88..40742fb 100644 > --- a/providers/qedr/qelr_main.c > +++ b/providers/qedr/qelr_main.c > @@ -96,6 +96,11 @@ static const struct verbs_context_ops qelr_ctx_ops = { > .query_qp = qelr_query_qp, > .modify_qp = qelr_modify_qp, > .destroy_qp = qelr_destroy_qp, > + .create_srq = qelr_create_srq, > + .destroy_srq = qelr_destroy_srq, > + .modify_srq = qelr_modify_srq, > + .query_srq = qelr_query_srq, > + .post_srq_recv = qelr_post_srq_recv, > .post_send = qelr_post_send, > .post_recv = qelr_post_recv, > .async_event = qelr_async_event, > @@ -183,8 +188,10 @@ static struct verbs_context *qelr_alloc_context(struct ibv_device *ibdev, > ctx->db_size = resp.db_size; > ctx->max_send_wr = resp.max_send_wr; > ctx->max_recv_wr = resp.max_recv_wr; > + ctx->max_srq_wr = resp.max_srq_wr; > ctx->sges_per_send_wr = resp.sges_per_send_wr; > ctx->sges_per_recv_wr = resp.sges_per_recv_wr; > + ctx->sges_per_srq_wr = resp.sges_per_recv_wr; > ctx->max_cqes = resp.max_cqes; > > ctx->db_addr = mmap(NULL, ctx->db_size, PROT_WRITE, MAP_SHARED, > diff --git a/providers/qedr/qelr_verbs.c b/providers/qedr/qelr_verbs.c > index 28ea094..fdcf8ab 100644 > --- a/providers/qedr/qelr_verbs.c > +++ b/providers/qedr/qelr_verbs.c > @@ -313,6 +313,143 @@ int qelr_destroy_cq(struct ibv_cq *ibv_cq) > return 0; > } > > +int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr) > +{ > + struct ibv_query_srq cmd; > + > + return ibv_cmd_query_srq(ibv_srq, attr, &cmd, sizeof(cmd)); > +} > + > +int qelr_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, > + int attr_mask) > +{ > + struct ibv_modify_srq cmd; > + > + return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof(cmd)); > + > +} > + > +static void qelr_destroy_srq_buffers(struct ibv_srq *ibv_srq) > +{ > + struct qelr_srq *srq = get_qelr_srq(ibv_srq); > + uint32_t *virt_prod_pair_addr; > + uint32_t prod_size; > + > + qelr_chain_free(&srq->hw_srq.chain); > + > + virt_prod_pair_addr = srq->hw_srq.virt_prod_pair_addr; > + prod_size = sizeof(struct rdma_srq_producers); > + > + ibv_dofork_range(virt_prod_pair_addr, prod_size); > + munmap(virt_prod_pair_addr, prod_size); > +} > + > +int qelr_destroy_srq(struct ibv_srq *ibv_srq) > +{ > + struct qelr_srq *srq = get_qelr_srq(ibv_srq); > + int ret; > + > + ret = ibv_cmd_destroy_srq(ibv_srq); > + if (ret) > + return ret; > + > + qelr_destroy_srq_buffers(ibv_srq); > + free(srq); > + > + return 0; > +} > + > +static inline void > +qelr_create_srq_configure_req(struct qelr_srq *srq, > + struct qelr_create_srq *req) > +{ > + req->srq_addr = (uintptr_t)srq->hw_srq.chain.first_addr; > + req->srq_len = srq->hw_srq.chain.size; > + req->prod_pair_addr = (uintptr_t)srq->hw_srq.virt_prod_pair_addr; > +} > + > +static inline int qelr_create_srq_buffers(struct qelr_devctx *cxt, > + struct qelr_srq *srq, > + struct ibv_srq_init_attr *attrs) > +{ Why are those functions declared as inline? > + uint32_t max_wr, max_sges; > + int chain_size, prod_size; > + void *addr; > + int rc; > + > + max_wr = attrs->attr.max_wr; > + if (!max_wr) > + return -EINVAL; > + > + max_wr = min_t(uint32_t, max_wr, cxt->max_srq_wr); > + max_sges = max_wr * (cxt->sges_per_srq_wr + 1); /* +1 for header */ > + chain_size = max_sges * QELR_RQE_ELEMENT_SIZE; > + > + rc = qelr_chain_alloc(&srq->hw_srq.chain, chain_size, > + cxt->kernel_page_size, QELR_RQE_ELEMENT_SIZE); > + if (rc) { > + DP_ERR(cxt->dbg_fp, > + "create srq: failed to map srq, got %d", rc); > + return rc; > + } > + > + prod_size = sizeof(struct rdma_srq_producers); > + addr = mmap(NULL, prod_size, PROT_READ | PROT_WRITE, > + MAP_PRIVATE | MAP_ANONYMOUS, -1, > + 0); > + if (addr == MAP_FAILED) { > + DP_ERR(cxt->dbg_fp, > + "create srq: failed to map producer, got %d", errno); > + qelr_chain_free(&srq->hw_srq.chain); > + return errno; > + } > + > + rc = ibv_dontfork_range(addr, prod_size); > + if (rc) { > + munmap(addr, prod_size); > + qelr_chain_free(&srq->hw_srq.chain); > + return rc; > + } > + > + srq->hw_srq.virt_prod_pair_addr = addr; > + srq->hw_srq.max_sges = cxt->sges_per_srq_wr; > + srq->hw_srq.max_wr = max_wr; > + > + return 0; > +} > + > +struct ibv_srq *qelr_create_srq(struct ibv_pd *pd, > + struct ibv_srq_init_attr *init_attr) > +{ > + struct qelr_devctx *cxt = get_qelr_ctx(pd->context); > + struct qelr_create_srq req; > + struct qelr_create_srq_resp resp; > + struct qelr_srq *srq; > + int ret; > + > + srq = calloc(1, sizeof(*srq)); > + if (!srq) > + return NULL; > + > + ret = qelr_create_srq_buffers(cxt, srq, init_attr); > + if (ret) { > + free(srq); > + return NULL; > + } > + > + pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE); > + qelr_create_srq_configure_req(srq, &req); > + ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, init_attr, &req.ibv_cmd, > + sizeof(req), &resp.ibv_resp, sizeof(resp)); > + if (ret) { > + qelr_destroy_srq_buffers(&srq->ibv_srq); > + free(srq); > + return NULL; > + } > + > + return &srq->ibv_srq; > +} > + > static void qelr_free_rq(struct qelr_qp *qp) > { > free(qp->rqe_wr_id); > @@ -531,6 +668,9 @@ struct ibv_qp *qelr_create_qp(struct ibv_pd *pd, > if (!qp) > return NULL; > > + if (attrs->srq) > + qp->srq = get_qelr_srq(attrs->srq); > + > rc = qelr_create_qp_buffers(cxt, qp, attrs); > if (rc) > goto err0; > @@ -1485,6 +1625,98 @@ int qelr_post_send(struct ibv_qp *ib_qp, struct ibv_send_wr *wr, > return rc; > } > > +static uint32_t qelr_srq_elem_left(struct qelr_srq_hwq_info *hw_srq) > +{ > + uint32_t used; > + > + /* Calculate number of elements used based on producer > + * count and consumer count and subtract it from max > + * work request supported so that we get elements left. > + */ > + used = (uint32_t)(((uint64_t)((uint64_t)~0U) + 1 + > + (uint64_t)(hw_srq->wr_prod_cnt)) - > + (uint64_t)hw_srq->wr_cons_cnt); > + > + return hw_srq->max_wr - used; > +} > + > +int qelr_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, > + struct ibv_recv_wr **bad_wr) > +{ > + struct qelr_devctx *cxt = get_qelr_ctx(ibsrq->context); > + struct qelr_srq *srq = get_qelr_srq(ibsrq); > + struct qelr_srq_hwq_info *hw_srq = &srq->hw_srq; > + struct qelr_chain *chain; > + int status = 0; > + > + pthread_spin_lock(&srq->lock); > + > + chain = &srq->hw_srq.chain; > + while (wr) { > + struct rdma_srq_wqe_header *hdr; > + int i; > + > + if (!qelr_srq_elem_left(hw_srq) || > + wr->num_sge > srq->hw_srq.max_sges) { > + DP_ERR(cxt->dbg_fp, > + "Can't post WR (%d,%d) || (%d > %d)\n", > + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, > + wr->num_sge, > + srq->hw_srq.max_sges); > + status = -ENOMEM; > + *bad_wr = wr; > + break; > + } > + > + hdr = qelr_chain_produce(chain); > + > + SRQ_HDR_SET(hdr, wr->wr_id, wr->num_sge); > + > + hw_srq->wr_prod_cnt++; > + hw_srq->wqe_prod++; > + hw_srq->sge_prod++; > + > + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, > + "SRQ WR: SGEs: %d with wr_id[%d] = %" PRIx64 "\n", > + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); > + > + for (i = 0; i < wr->num_sge; i++) { > + struct rdma_srq_sge *srq_sge; > + > + srq_sge = qelr_chain_produce(chain); > + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, > + wr->sg_list[i].length, wr->sg_list[i].lkey); > + > + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, > + "[%d]: len %d key %x addr %x:%x\n", > + i, srq_sge->length, srq_sge->l_key, > + srq_sge->addr.hi, srq_sge->addr.lo); > + hw_srq->sge_prod++; > + } > + > + /* Make sure that descriptors are written before we update > + * producers. > + */ > + > + udma_to_device_barrier(); > + > + struct rdma_srq_producers *virt_prod; > + > + virt_prod = srq->hw_srq.virt_prod_pair_addr; > + virt_prod->sge_prod = htole32(hw_srq->sge_prod); > + virt_prod->wqe_prod = htole32(hw_srq->wqe_prod); > + > + wr = wr->next; > + } > + > + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, > + "POST: Elements in SRQ: %d\n", > + qelr_chain_get_elem_left_u32(chain)); > + pthread_spin_unlock(&srq->lock); > + > + return status; > +} > + > int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, > struct ibv_recv_wr **bad_wr) > { > @@ -1494,6 +1726,13 @@ int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, > uint16_t db_val; > uint8_t iwarp = IS_IWARP(ibqp->context->device); > > + if (unlikely(qp->srq)) { > + DP_ERR(cxt->dbg_fp, > + "QP is associated with SRQ, cannot post RQ buffers\n"); > + *bad_wr = wr; > + return -EINVAL; > + } > + > pthread_spin_lock(&qp->q_lock); > > if (!iwarp && qp->state == QELR_QPS_RST) { > @@ -1826,6 +2065,30 @@ static void __process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq, > wc->qp_num = qp->qp_id; > } > > +static int process_resp_one_srq(struct qelr_qp *qp, struct qelr_cq *cq, > + struct ibv_wc *wc, > + struct rdma_cqe_responder *resp) > +{ > + struct qelr_srq_hwq_info *hw_srq = &qp->srq->hw_srq; > + uint64_t wr_id; > + > + wr_id = (((uint64_t)(le32toh(resp->srq_wr_id.hi))) << 32) + > + le32toh(resp->srq_wr_id.lo); > + > + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { > + wc->byte_len = 0; > + wc->status = IBV_WC_WR_FLUSH_ERR; > + wc->qp_num = qp->qp_id; > + wc->wr_id = wr_id; > + } else { > + __process_resp_one(qp, cq, wc, resp, wr_id); > + } > + > + hw_srq->wr_cons_cnt++; > + > + return 1; > +} > + > static int process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq, > struct ibv_wc *wc, struct rdma_cqe_responder *resp) > { > @@ -1891,6 +2154,19 @@ static void try_consume_resp_cqe(struct qelr_cq *cq, struct qelr_qp *qp, > } > } > > +static int qelr_poll_cq_resp_srq(struct qelr_qp *qp, struct qelr_cq *cq, > + int num_entries, struct ibv_wc *wc, > + struct rdma_cqe_responder *resp, int *update) > +{ > + int cnt; > + > + cnt = process_resp_one_srq(qp, cq, wc, resp); > + consume_cqe(cq); > + *update |= 1; > + > + return cnt; > +} > + > static int qelr_poll_cq_resp(struct qelr_qp *qp, struct qelr_cq *cq, > int num_entries, struct ibv_wc *wc, > struct rdma_cqe_responder *resp, int *update) > @@ -1952,6 +2228,10 @@ int qelr_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc) > cnt = qelr_poll_cq_resp(qp, cq, num_entries, wc, > &cqe->resp, &update); > break; > + case RDMA_CQE_TYPE_RESPONDER_SRQ: > + cnt = qelr_poll_cq_resp_srq(qp, cq, num_entries, wc, > + &cqe->resp, &update); > + break; > case RDMA_CQE_TYPE_INVALID: > default: > printf("Error: invalid CQE type = %d\n", > @@ -2018,6 +2298,9 @@ void qelr_async_event(struct ibv_async_event *event) > case IBV_EVENT_COMM_EST: > case IBV_EVENT_QP_LAST_WQE_REACHED: > break; > + case IBV_EVENT_SRQ_LIMIT_REACHED: > + case IBV_EVENT_SRQ_ERR: > + return; > case IBV_EVENT_PORT_ACTIVE: > case IBV_EVENT_PORT_ERR: > break; > diff --git a/providers/qedr/qelr_verbs.h b/providers/qedr/qelr_verbs.h > index 50d1182..26802c3 100644 > --- a/providers/qedr/qelr_verbs.h > +++ b/providers/qedr/qelr_verbs.h > @@ -73,5 +73,14 @@ int qelr_post_send(struct ibv_qp *ib_qp, struct ibv_send_wr *wr, > int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, > struct ibv_recv_wr **bad_wr); > > +int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr); > +int qelr_modify_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr, > + int attr_mask); > +struct ibv_srq *qelr_create_srq(struct ibv_pd *pd, > + struct ibv_srq_init_attr *init_attr); > +int qelr_destroy_srq(struct ibv_srq *ibv_srq); > +int qelr_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, > + struct ibv_recv_wr **bad_wr); > + > void qelr_async_event(struct ibv_async_event *event); > #endif /* __QELR_VERBS_H__ */ > diff --git a/providers/qedr/rdma_common.h b/providers/qedr/rdma_common.h > index 0707e17..f2d76bb 100644 > --- a/providers/qedr/rdma_common.h > +++ b/providers/qedr/rdma_common.h > @@ -53,6 +53,7 @@ > #define RDMA_MAX_CQS (64*1024) > #define RDMA_MAX_TIDS (128*1024-1) > #define RDMA_MAX_PDS (64*1024) > +#define RDMA_MAX_SRQS (32*1024) > > #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS > #define RDMA_NUM_STATISTIC_COUNTERS_K2 MAX_NUM_VPORTS_K2 > -- > 2.9.5 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html
Attachment:
signature.asc
Description: PGP signature