> From: linux-rdma-owner@xxxxxxxxxxxxxxx [mailto:linux-rdma- > owner@xxxxxxxxxxxxxxx] On Behalf Of Leon Romanovsky > Sent: Monday, July 23, 2018 9:01 PM > On Mon, Jul 23, 2018 at 05:57:51PM +0300, Yuval Bason wrote: > > Enable SRQ's for rdma-core. > > > > SRQ rdma-core implementation is pretty straight forward, except > > perhaps the way that the driver udpates FW producers: they are updated > > using a mapped buffer that FW reads, and not doorbells Like RQ / SQ. > > udpates -> updates Will fix. > > > > > Signed-off-by: Yuval Bason <yuval.bason@xxxxxxxxxx> > > Signed-off-by: Michal Kalderon <michal.kalderon@xxxxxxxxxx> > > > > Please put "---" before the line below, so it won't be visible in commit log. Will fix. > > > Changes from v1 to v2: > > - Clean SRQ verbs to follow common style. > > - Code cleanup - one-time-use macro, inline function with one line. > > - Use udma_to_device_barrier() to write to a system memory instead of > > mmio_flush_writes (not a BAR backed memory). > > --- > > providers/qedr/qelr.h | 43 +++++++ > > providers/qedr/qelr_abi.h | 2 + > > providers/qedr/qelr_main.c | 7 ++ > > providers/qedr/qelr_verbs.c | 283 > +++++++++++++++++++++++++++++++++++++++++++ > > providers/qedr/qelr_verbs.h | 9 ++ > > providers/qedr/rdma_common.h | 1 + > > 6 files changed, 345 insertions(+) > > > > diff --git a/providers/qedr/qelr.h b/providers/qedr/qelr.h index > > 0b2e4a2..eeebfe0 100644 > > --- a/providers/qedr/qelr.h > > +++ b/providers/qedr/qelr.h > > @@ -61,6 +61,7 @@ enum DP_MODULE { > > QELR_MSG_QP = (QELR_MSG_SQ | QELR_MSG_RQ), > > QELR_MSG_MR = 0x80000, > > QELR_MSG_INIT = 0x100000, > > + QELR_MSG_SRQ = 0x200000, > > /* to be added...up to 0x8000000 */ > > }; > > > > @@ -128,8 +129,10 @@ struct qelr_devctx { > > > > uint32_t max_send_wr; > > uint32_t max_recv_wr; > > + uint32_t max_srq_wr; > > uint32_t sges_per_send_wr; > > uint32_t sges_per_recv_wr; > > + uint32_t sges_per_srq_wr; > > int max_cqes; > > }; > > > > @@ -221,6 +224,27 @@ struct qelr_dpm { > > struct qelr_rdma_ext *rdma_ext; > > }; > > > > +struct qelr_srq_hwq_info { > > + uint32_t max_sges; > > + uint32_t max_wr; > > + struct qelr_chain chain; > > + uint32_t wqe_prod; /* WQE prod index in HW ring */ > > + uint32_t sge_prod; /* SGE prod index in HW ring */ > > + uint32_t wr_prod_cnt; /* wr producer count */ > > + uint32_t wr_cons_cnt; /* wr consumer count */ > > + uint32_t num_elems; > > + > > + void *virt_prod_pair_addr; /* producer pair virtual address */ > > Don't you want to declare it as uintptr_t? No, I'm referring this variable as pointer only, without casting. > > > + uint64_t phy_prod_pair_addr; /* producer pair physical address */ > > I don't see any usage of this field. Right. Will remove. > > > +}; > > + > > +struct qelr_srq { > > + struct ibv_srq ibv_srq; > > + struct qelr_srq_hwq_info hw_srq; > > + uint16_t srq_id; > > + pthread_spinlock_t lock; > > +}; > > + > > struct qelr_qp { > > struct ibv_qp ibv_qp; > > pthread_spinlock_t q_lock; > > @@ -247,6 +271,7 @@ struct qelr_qp { > > int sq_sig_all; > > int atomic_supported; > > uint8_t edpm_disabled; > > + struct qelr_srq *srq; > > }; > > > > static inline struct qelr_devctx *get_qelr_ctx(struct ibv_context > > *ibctx) @@ -274,6 +299,11 @@ static inline struct qelr_cq *get_qelr_cq(struct > ibv_cq *ibcq) > > return container_of(ibcq, struct qelr_cq, ibv_cq); } > > > > +static inline struct qelr_srq *get_qelr_srq(struct ibv_srq *ibsrq) { > > + return container_of(ibsrq, struct qelr_srq, ibv_srq); } > > + > > #define SET_FIELD(value, name, flag) \ > > do { \ > > (value) &= ~(name ## _MASK << name ## _SHIFT); \ > > @@ -308,6 +338,19 @@ static inline struct qelr_cq *get_qelr_cq(struct ibv_cq > *ibcq) > > (sge)->flags = htole32(vflags); \ > > } while (0) > > > > +#define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ > > + do { \ > > + TYPEPTR_ADDR_SET(hdr, wr_id, vwr_id); \ > > + (hdr)->num_sges = num_sge; \ > > + } while (0) > > + > > +#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ > > + do { \ > > + TYPEPTR_ADDR_SET(sge, addr, vaddr); \ > > + (sge)->length = htole32(vlength); \ > > + (sge)->l_key = htole32(vlkey); \ > > + } while (0) > > + > > #define U64_HI(val) ((uint32_t)(((uint64_t)(uintptr_t)(val)) >> 32)) > > #define U64_LO(val) ((uint32_t)(((uint64_t)(uintptr_t)(val)) & > > 0xffffffff)) #define HILO_U64(hi, lo) ((uintptr_t)((((uint64_t)(hi)) > > << 32) + (lo))) diff --git a/providers/qedr/qelr_abi.h > > b/providers/qedr/qelr_abi.h index 3666845..c674ddc 100644 > > --- a/providers/qedr/qelr_abi.h > > +++ b/providers/qedr/qelr_abi.h > > @@ -49,5 +49,7 @@ DECLARE_DRV_CMD(qelr_get_context, > IB_USER_VERBS_CMD_GET_CONTEXT, > > empty, qedr_alloc_ucontext_resp); > > DECLARE_DRV_CMD(qelr_reg_mr, IB_USER_VERBS_CMD_REG_MR, > > empty, empty); > > +DECLARE_DRV_CMD(qelr_create_srq, IB_USER_VERBS_CMD_CREATE_SRQ, > > + qedr_create_srq_ureq, qedr_create_srq_uresp); > > > > #endif /* __QELR_ABI_H__ */ > > diff --git a/providers/qedr/qelr_main.c b/providers/qedr/qelr_main.c > > index e99fc88..40742fb 100644 > > --- a/providers/qedr/qelr_main.c > > +++ b/providers/qedr/qelr_main.c > > @@ -96,6 +96,11 @@ static const struct verbs_context_ops qelr_ctx_ops = { > > .query_qp = qelr_query_qp, > > .modify_qp = qelr_modify_qp, > > .destroy_qp = qelr_destroy_qp, > > + .create_srq = qelr_create_srq, > > + .destroy_srq = qelr_destroy_srq, > > + .modify_srq = qelr_modify_srq, > > + .query_srq = qelr_query_srq, > > + .post_srq_recv = qelr_post_srq_recv, > > .post_send = qelr_post_send, > > .post_recv = qelr_post_recv, > > .async_event = qelr_async_event, > > @@ -183,8 +188,10 @@ static struct verbs_context > *qelr_alloc_context(struct ibv_device *ibdev, > > ctx->db_size = resp.db_size; > > ctx->max_send_wr = resp.max_send_wr; > > ctx->max_recv_wr = resp.max_recv_wr; > > + ctx->max_srq_wr = resp.max_srq_wr; > > ctx->sges_per_send_wr = resp.sges_per_send_wr; > > ctx->sges_per_recv_wr = resp.sges_per_recv_wr; > > + ctx->sges_per_srq_wr = resp.sges_per_recv_wr; > > ctx->max_cqes = resp.max_cqes; > > > > ctx->db_addr = mmap(NULL, ctx->db_size, PROT_WRITE, MAP_SHARED, > diff > > --git a/providers/qedr/qelr_verbs.c b/providers/qedr/qelr_verbs.c > > index 28ea094..fdcf8ab 100644 > > --- a/providers/qedr/qelr_verbs.c > > +++ b/providers/qedr/qelr_verbs.c > > @@ -313,6 +313,143 @@ int qelr_destroy_cq(struct ibv_cq *ibv_cq) > > return 0; > > } > > > > +int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr > > +*attr) { > > + struct ibv_query_srq cmd; > > + > > + return ibv_cmd_query_srq(ibv_srq, attr, &cmd, sizeof(cmd)); } > > + > > +int qelr_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, > > + int attr_mask) > > +{ > > + struct ibv_modify_srq cmd; > > + > > + return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof(cmd)); > > + > > +} > > + > > +static void qelr_destroy_srq_buffers(struct ibv_srq *ibv_srq) { > > + struct qelr_srq *srq = get_qelr_srq(ibv_srq); > > + uint32_t *virt_prod_pair_addr; > > + uint32_t prod_size; > > + > > + qelr_chain_free(&srq->hw_srq.chain); > > + > > + virt_prod_pair_addr = srq->hw_srq.virt_prod_pair_addr; > > + prod_size = sizeof(struct rdma_srq_producers); > > + > > + ibv_dofork_range(virt_prod_pair_addr, prod_size); > > + munmap(virt_prod_pair_addr, prod_size); } > > + > > +int qelr_destroy_srq(struct ibv_srq *ibv_srq) { > > + struct qelr_srq *srq = get_qelr_srq(ibv_srq); > > + int ret; > > + > > + ret = ibv_cmd_destroy_srq(ibv_srq); > > + if (ret) > > + return ret; > > + > > + qelr_destroy_srq_buffers(ibv_srq); > > + free(srq); > > + > > + return 0; > > +} > > + > > +static inline void > > +qelr_create_srq_configure_req(struct qelr_srq *srq, > > + struct qelr_create_srq *req) { > > + req->srq_addr = (uintptr_t)srq->hw_srq.chain.first_addr; > > + req->srq_len = srq->hw_srq.chain.size; > > + req->prod_pair_addr = (uintptr_t)srq->hw_srq.virt_prod_pair_addr; > > +} > > + > > +static inline int qelr_create_srq_buffers(struct qelr_devctx *cxt, > > + struct qelr_srq *srq, > > + struct ibv_srq_init_attr *attrs) { > > Why are those functions declared as inline? > > > + uint32_t max_wr, max_sges; > > + int chain_size, prod_size; > > + void *addr; > > + int rc; > > + > > + max_wr = attrs->attr.max_wr; > > + if (!max_wr) > > + return -EINVAL; > > + > > + max_wr = min_t(uint32_t, max_wr, cxt->max_srq_wr); > > + max_sges = max_wr * (cxt->sges_per_srq_wr + 1); /* +1 for header */ > > + chain_size = max_sges * QELR_RQE_ELEMENT_SIZE; > > + > > + rc = qelr_chain_alloc(&srq->hw_srq.chain, chain_size, > > + cxt->kernel_page_size, QELR_RQE_ELEMENT_SIZE); > > + if (rc) { > > + DP_ERR(cxt->dbg_fp, > > + "create srq: failed to map srq, got %d", rc); > > + return rc; > > + } > > + > > + prod_size = sizeof(struct rdma_srq_producers); > > + addr = mmap(NULL, prod_size, PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_ANONYMOUS, -1, > > + 0); > > + if (addr == MAP_FAILED) { > > + DP_ERR(cxt->dbg_fp, > > + "create srq: failed to map producer, got %d", errno); > > + qelr_chain_free(&srq->hw_srq.chain); > > + return errno; > > + } > > + > > + rc = ibv_dontfork_range(addr, prod_size); > > + if (rc) { > > + munmap(addr, prod_size); > > + qelr_chain_free(&srq->hw_srq.chain); > > + return rc; > > + } > > + > > + srq->hw_srq.virt_prod_pair_addr = addr; > > + srq->hw_srq.max_sges = cxt->sges_per_srq_wr; > > + srq->hw_srq.max_wr = max_wr; > > + > > + return 0; > > +} > > + > > +struct ibv_srq *qelr_create_srq(struct ibv_pd *pd, > > + struct ibv_srq_init_attr *init_attr) { > > + struct qelr_devctx *cxt = get_qelr_ctx(pd->context); > > + struct qelr_create_srq req; > > + struct qelr_create_srq_resp resp; > > + struct qelr_srq *srq; > > + int ret; > > + > > + srq = calloc(1, sizeof(*srq)); > > + if (!srq) > > + return NULL; > > + > > + ret = qelr_create_srq_buffers(cxt, srq, init_attr); > > + if (ret) { > > + free(srq); > > + return NULL; > > + } > > + > > + pthread_spin_init(&srq->lock, PTHREAD_PROCESS_PRIVATE); > > + qelr_create_srq_configure_req(srq, &req); > > + ret = ibv_cmd_create_srq(pd, &srq->ibv_srq, init_attr, &req.ibv_cmd, > > + sizeof(req), &resp.ibv_resp, sizeof(resp)); > > + if (ret) { > > + qelr_destroy_srq_buffers(&srq->ibv_srq); > > + free(srq); > > + return NULL; > > + } > > + > > + return &srq->ibv_srq; > > +} > > + > > static void qelr_free_rq(struct qelr_qp *qp) { > > free(qp->rqe_wr_id); > > @@ -531,6 +668,9 @@ struct ibv_qp *qelr_create_qp(struct ibv_pd *pd, > > if (!qp) > > return NULL; > > > > + if (attrs->srq) > > + qp->srq = get_qelr_srq(attrs->srq); > > + > > rc = qelr_create_qp_buffers(cxt, qp, attrs); > > if (rc) > > goto err0; > > @@ -1485,6 +1625,98 @@ int qelr_post_send(struct ibv_qp *ib_qp, struct > ibv_send_wr *wr, > > return rc; > > } > > > > +static uint32_t qelr_srq_elem_left(struct qelr_srq_hwq_info *hw_srq) > > +{ > > + uint32_t used; > > + > > + /* Calculate number of elements used based on producer > > + * count and consumer count and subtract it from max > > + * work request supported so that we get elements left. > > + */ > > + used = (uint32_t)(((uint64_t)((uint64_t)~0U) + 1 + > > + (uint64_t)(hw_srq->wr_prod_cnt)) - > > + (uint64_t)hw_srq->wr_cons_cnt); > > + > > + return hw_srq->max_wr - used; > > +} > > + > > +int qelr_post_srq_recv(struct ibv_srq *ibsrq, struct ibv_recv_wr *wr, > > + struct ibv_recv_wr **bad_wr) { > > + struct qelr_devctx *cxt = get_qelr_ctx(ibsrq->context); > > + struct qelr_srq *srq = get_qelr_srq(ibsrq); > > + struct qelr_srq_hwq_info *hw_srq = &srq->hw_srq; > > + struct qelr_chain *chain; > > + int status = 0; > > + > > + pthread_spin_lock(&srq->lock); > > + > > + chain = &srq->hw_srq.chain; > > + while (wr) { > > + struct rdma_srq_wqe_header *hdr; > > + int i; > > + > > + if (!qelr_srq_elem_left(hw_srq) || > > + wr->num_sge > srq->hw_srq.max_sges) { > > + DP_ERR(cxt->dbg_fp, > > + "Can't post WR (%d,%d) || (%d > %d)\n", > > + hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, > > + wr->num_sge, > > + srq->hw_srq.max_sges); > > + status = -ENOMEM; > > + *bad_wr = wr; > > + break; > > + } > > + > > + hdr = qelr_chain_produce(chain); > > + > > + SRQ_HDR_SET(hdr, wr->wr_id, wr->num_sge); > > + > > + hw_srq->wr_prod_cnt++; > > + hw_srq->wqe_prod++; > > + hw_srq->sge_prod++; > > + > > + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, > > + "SRQ WR: SGEs: %d with wr_id[%d] = %" PRIx64 "\n", > > + wr->num_sge, hw_srq->wqe_prod, wr->wr_id); > > + > > + for (i = 0; i < wr->num_sge; i++) { > > + struct rdma_srq_sge *srq_sge; > > + > > + srq_sge = qelr_chain_produce(chain); > > + SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr, > > + wr->sg_list[i].length, wr->sg_list[i].lkey); > > + > > + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, > > + "[%d]: len %d key %x addr %x:%x\n", > > + i, srq_sge->length, srq_sge->l_key, > > + srq_sge->addr.hi, srq_sge->addr.lo); > > + hw_srq->sge_prod++; > > + } > > + > > + /* Make sure that descriptors are written before we update > > + * producers. > > + */ > > + > > + udma_to_device_barrier(); > > + > > + struct rdma_srq_producers *virt_prod; > > + > > + virt_prod = srq->hw_srq.virt_prod_pair_addr; > > + virt_prod->sge_prod = htole32(hw_srq->sge_prod); > > + virt_prod->wqe_prod = htole32(hw_srq->wqe_prod); > > + > > + wr = wr->next; > > + } > > + > > + DP_VERBOSE(cxt->dbg_fp, QELR_MSG_SRQ, > > + "POST: Elements in SRQ: %d\n", > > + qelr_chain_get_elem_left_u32(chain)); > > + pthread_spin_unlock(&srq->lock); > > + > > + return status; > > +} > > + > > int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr, > > struct ibv_recv_wr **bad_wr) > > { > > @@ -1494,6 +1726,13 @@ int qelr_post_recv(struct ibv_qp *ibqp, struct > ibv_recv_wr *wr, > > uint16_t db_val; > > uint8_t iwarp = IS_IWARP(ibqp->context->device); > > > > + if (unlikely(qp->srq)) { > > + DP_ERR(cxt->dbg_fp, > > + "QP is associated with SRQ, cannot post RQ buffers\n"); > > + *bad_wr = wr; > > + return -EINVAL; > > + } > > + > > pthread_spin_lock(&qp->q_lock); > > > > if (!iwarp && qp->state == QELR_QPS_RST) { @@ -1826,6 +2065,30 @@ > > static void __process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq, > > wc->qp_num = qp->qp_id; > > } > > > > +static int process_resp_one_srq(struct qelr_qp *qp, struct qelr_cq *cq, > > + struct ibv_wc *wc, > > + struct rdma_cqe_responder *resp) > > +{ > > + struct qelr_srq_hwq_info *hw_srq = &qp->srq->hw_srq; > > + uint64_t wr_id; > > + > > + wr_id = (((uint64_t)(le32toh(resp->srq_wr_id.hi))) << 32) + > > + le32toh(resp->srq_wr_id.lo); > > + > > + if (resp->status == > RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { > > + wc->byte_len = 0; > > + wc->status = IBV_WC_WR_FLUSH_ERR; > > + wc->qp_num = qp->qp_id; > > + wc->wr_id = wr_id; > > + } else { > > + __process_resp_one(qp, cq, wc, resp, wr_id); > > + } > > + > > + hw_srq->wr_cons_cnt++; > > + > > + return 1; > > +} > > + > > static int process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq, > > struct ibv_wc *wc, struct rdma_cqe_responder *resp) > { @@ > > -1891,6 +2154,19 @@ static void try_consume_resp_cqe(struct qelr_cq *cq, > struct qelr_qp *qp, > > } > > } > > > > +static int qelr_poll_cq_resp_srq(struct qelr_qp *qp, struct qelr_cq *cq, > > + int num_entries, struct ibv_wc *wc, > > + struct rdma_cqe_responder *resp, int *update) > { > > + int cnt; > > + > > + cnt = process_resp_one_srq(qp, cq, wc, resp); > > + consume_cqe(cq); > > + *update |= 1; > > + > > + return cnt; > > +} > > + > > static int qelr_poll_cq_resp(struct qelr_qp *qp, struct qelr_cq *cq, > > int num_entries, struct ibv_wc *wc, > > struct rdma_cqe_responder *resp, int *update) @@ > -1952,6 > > +2228,10 @@ int qelr_poll_cq(struct ibv_cq *ibcq, int num_entries, struct > ibv_wc *wc) > > cnt = qelr_poll_cq_resp(qp, cq, num_entries, wc, > > &cqe->resp, &update); > > break; > > + case RDMA_CQE_TYPE_RESPONDER_SRQ: > > + cnt = qelr_poll_cq_resp_srq(qp, cq, num_entries, wc, > > + &cqe->resp, &update); > > + break; > > case RDMA_CQE_TYPE_INVALID: > > default: > > printf("Error: invalid CQE type = %d\n", @@ -2018,6 > +2298,9 @@ > > void qelr_async_event(struct ibv_async_event *event) > > case IBV_EVENT_COMM_EST: > > case IBV_EVENT_QP_LAST_WQE_REACHED: > > break; > > + case IBV_EVENT_SRQ_LIMIT_REACHED: > > + case IBV_EVENT_SRQ_ERR: > > + return; > > case IBV_EVENT_PORT_ACTIVE: > > case IBV_EVENT_PORT_ERR: > > break; > > diff --git a/providers/qedr/qelr_verbs.h b/providers/qedr/qelr_verbs.h > > index 50d1182..26802c3 100644 > > --- a/providers/qedr/qelr_verbs.h > > +++ b/providers/qedr/qelr_verbs.h > > @@ -73,5 +73,14 @@ int qelr_post_send(struct ibv_qp *ib_qp, struct > > ibv_send_wr *wr, int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr > *wr, > > struct ibv_recv_wr **bad_wr); > > > > +int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr > > +*attr); int qelr_modify_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr, > > + int attr_mask); > > +struct ibv_srq *qelr_create_srq(struct ibv_pd *pd, > > + struct ibv_srq_init_attr *init_attr); int > qelr_destroy_srq(struct > > +ibv_srq *ibv_srq); int qelr_post_srq_recv(struct ibv_srq *ibsrq, > > +struct ibv_recv_wr *wr, > > + struct ibv_recv_wr **bad_wr); > > + > > void qelr_async_event(struct ibv_async_event *event); #endif /* > > __QELR_VERBS_H__ */ diff --git a/providers/qedr/rdma_common.h > > b/providers/qedr/rdma_common.h index 0707e17..f2d76bb 100644 > > --- a/providers/qedr/rdma_common.h > > +++ b/providers/qedr/rdma_common.h > > @@ -53,6 +53,7 @@ > > #define RDMA_MAX_CQS (64*1024) > > #define RDMA_MAX_TIDS (128*1024-1) > > #define RDMA_MAX_PDS (64*1024) > > +#define RDMA_MAX_SRQS (32*1024) > > > > #define RDMA_NUM_STATISTIC_COUNTERS > MAX_NUM_VPORTS > > #define RDMA_NUM_STATISTIC_COUNTERS_K2 > MAX_NUM_VPORTS_K2 > > -- > > 2.9.5 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-rdma" > > in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo > > info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html