On Tue, Mar 17, 2020 at 03:40:30PM +0200, Max Gurtovoy wrote: > In order to save resource allocation and utilize the completion > locality in a better way (compared to SRQ per device that exist > today), allocate Shared Receive Queues (SRQs) per completion vector. > Associate each created channel with an appropriate SRQ according to the > completion vector index. This association will reduce the lock > contention in the fast path (compared to SRQ per device solution) and > increase the locality in memory buffers. > > Signed-off-by: Max Gurtovoy <maxg@xxxxxxxxxxxx> > --- > drivers/infiniband/ulp/srpt/ib_srpt.c | 169 +++++++++++++++++++++++++--------- > drivers/infiniband/ulp/srpt/ib_srpt.h | 26 +++++- > 2 files changed, 148 insertions(+), 47 deletions(-) > > diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c > index 9855274..34869b7 100644 > --- a/drivers/infiniband/ulp/srpt/ib_srpt.c > +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c > @@ -811,6 +811,31 @@ static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, > } > > /** > + * srpt_srq_post_recv - post an initial IB receive request for SRQ > + * @srq: SRPT SRQ context. > + * @ioctx: Receive I/O context pointer. > + */ > +static int srpt_srq_post_recv(struct srpt_srq *srq, struct srpt_recv_ioctx *ioctx) > +{ > + struct srpt_device *sdev = srq->sdev; > + struct ib_sge list; > + struct ib_recv_wr wr; > + > + BUG_ON(!srq); > + list.addr = ioctx->ioctx.dma + ioctx->ioctx.offset; > + list.length = srp_max_req_size; > + list.lkey = sdev->lkey; > + > + ioctx->ioctx.cqe.done = srpt_recv_done; > + wr.wr_cqe = &ioctx->ioctx.cqe; > + wr.next = NULL; > + wr.sg_list = &list; > + wr.num_sge = 1; > + > + return ib_post_srq_recv(srq->ibsrq, &wr, NULL); > +} > + > +/** > * srpt_post_recv - post an IB receive request > * @sdev: SRPT HCA pointer. > * @ch: SRPT RDMA channel. > @@ -823,6 +848,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, > struct ib_recv_wr wr; > > BUG_ON(!sdev); > + BUG_ON(!ch); > list.addr = ioctx->ioctx.dma + ioctx->ioctx.offset; > list.length = srp_max_req_size; > list.lkey = sdev->lkey; > @@ -834,7 +860,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, > wr.num_sge = 1; > > if (sdev->use_srq) > - return ib_post_srq_recv(sdev->srq, &wr, NULL); > + return ib_post_srq_recv(ch->srq->ibsrq, &wr, NULL); > else > return ib_post_recv(ch->qp, &wr, NULL); > } > @@ -1820,7 +1846,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) > SRPT_MAX_SG_PER_WQE); > qp_init->port_num = ch->sport->port; > if (sdev->use_srq) { > - qp_init->srq = sdev->srq; > + ch->srq = sdev->srqs[ch->cq->comp_vector % sdev->srq_count]; > + qp_init->srq = ch->srq->ibsrq; > } else { > qp_init->cap.max_recv_wr = ch->rq_size; > qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, > @@ -1878,6 +1905,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) > > static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) > { > + if (ch->srq) > + ch->srq = NULL; > ib_destroy_qp(ch->qp); > ib_free_cq(ch->cq); > } > @@ -3018,20 +3047,75 @@ static struct se_wwn *srpt_lookup_wwn(const char *name) > return wwn; > } > > -static void srpt_free_srq(struct srpt_device *sdev) > +static void srpt_free_srq(struct srpt_srq *srq) > { > - if (!sdev->srq) > - return; > > - ib_destroy_srq(sdev->srq); > - srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, > - sdev->srq_size, sdev->req_buf_cache, > + srpt_free_ioctx_ring((struct srpt_ioctx **)srq->ioctx_ring, srq->sdev, > + srq->sdev->srq_size, srq->sdev->req_buf_cache, > DMA_FROM_DEVICE); > + rdma_srq_put(srq->sdev->pd, srq->ibsrq); > + kfree(srq); > + > +} > + > +static void srpt_free_srqs(struct srpt_device *sdev) > +{ > + int i; > + > + if (!sdev->srqs) > + return; > + > + for (i = 0; i < sdev->srq_count; i++) > + srpt_free_srq(sdev->srqs[i]); > kmem_cache_destroy(sdev->req_buf_cache); > - sdev->srq = NULL; > + rdma_srq_set_destroy(sdev->pd); > + kfree(sdev->srqs); > + sdev->srqs = NULL; > } > > -static int srpt_alloc_srq(struct srpt_device *sdev) > +static struct srpt_srq *srpt_alloc_srq(struct srpt_device *sdev) > +{ > + struct srpt_srq *srq; > + int i, ret; > + > + srq = kzalloc(sizeof(*srq), GFP_KERNEL); > + if (!srq) { > + pr_debug("failed to allocate SRQ context\n"); Please no to kzalloc prints and no to pr_* prints. > + return ERR_PTR(-ENOMEM); > + } > + > + srq->ibsrq = rdma_srq_get(sdev->pd); > + if (!srq) { !srq->ibsrq ???? > + ret = -EAGAIN; > + goto free_srq; > + } Thanks