Re: [PATCH v4 9/9] IB/srpt: convert to the generic RDMA READ/WRITE API

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Mar 08, 2016 at 07:16:40PM +0100, Christoph Hellwig wrote:
> Replace the homegrown RDMA READ/WRITE code in srpt with the generic API.
> The only real twist here is that we need to allocate one Linux scatterlist
> per direct buffer in the SRP command, and chain them before handing them
> off to the target core.
> 
> As a side-effect of the conversion the driver will also chain the SEND
> of the SRP response to the RDMA WRITE WRs for a DATA OUT command, and
> properly account for RDMA WRITE WRs instead of just for RDMA READ WRs
> like the driver previously did.
> 
> We now allocate half of the SQ size to RDMA READ/WRITE contexts, assuming
> by default one RDMA READ or WRITE operation per command.  If a command
> has multiple operations it will eat into the budget but will still succeed,
> possible after waiting for WQEs to be available.
> 
> Also ensure the QPs request the maximum allowed SGEs so that RDMA R/W API
> works correctly.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>
> ---
>  drivers/infiniband/ulp/srpt/ib_srpt.c | 693 +++++++++++-----------------------
>  drivers/infiniband/ulp/srpt/ib_srpt.h |  30 +-
>  2 files changed, 242 insertions(+), 481 deletions(-)
> 
> diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
> index 25bdaee..05bdec8 100644
> --- a/drivers/infiniband/ulp/srpt/ib_srpt.c
> +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
> @@ -765,52 +765,6 @@ static int srpt_post_recv(struct srpt_device *sdev,
>  }
>  
>  /**
> - * srpt_post_send() - Post an IB send request.
> - *
> - * Returns zero upon success and a non-zero value upon failure.
> - */
> -static int srpt_post_send(struct srpt_rdma_ch *ch,
> -			  struct srpt_send_ioctx *ioctx, int len)
> -{
> -	struct ib_sge list;
> -	struct ib_send_wr wr, *bad_wr;
> -	struct srpt_device *sdev = ch->sport->sdev;
> -	int ret;
> -
> -	atomic_inc(&ch->req_lim);
> -
> -	ret = -ENOMEM;
> -	if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
> -		pr_warn("IB send queue full (needed 1)\n");
> -		goto out;
> -	}
> -
> -	ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len,
> -				      DMA_TO_DEVICE);
> -
> -	list.addr = ioctx->ioctx.dma;
> -	list.length = len;
> -	list.lkey = sdev->pd->local_dma_lkey;
> -
> -	ioctx->ioctx.cqe.done = srpt_send_done;
> -	wr.next = NULL;
> -	wr.wr_cqe = &ioctx->ioctx.cqe;
> -	wr.sg_list = &list;
> -	wr.num_sge = 1;
> -	wr.opcode = IB_WR_SEND;
> -	wr.send_flags = IB_SEND_SIGNALED;
> -
> -	ret = ib_post_send(ch->qp, &wr, &bad_wr);
> -
> -out:
> -	if (ret < 0) {
> -		atomic_inc(&ch->sq_wr_avail);
> -		atomic_dec(&ch->req_lim);
> -	}
> -	return ret;
> -}
> -
> -/**
>   * srpt_zerolength_write() - Perform a zero-length RDMA write.
>   *
>   * A quote from the InfiniBand specification: C9-88: For an HCA responder
> @@ -843,6 +797,110 @@ static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
>  	}
>  }
>  
> +static int srpt_alloc_rw_ctxs(struct srpt_send_ioctx *ioctx,
> +		struct srp_direct_buf *db, int nbufs, struct scatterlist **sg,
> +		unsigned *sg_cnt)
> +{
> +	enum dma_data_direction dir = target_reverse_dma_direction(&ioctx->cmd);
> +	struct srpt_rdma_ch *ch = ioctx->ch;
> +	struct scatterlist *prev = NULL;
> +	unsigned prev_nents;
> +	int ret, i;
> +
> +	if (nbufs == 1) {
> +		ioctx->rw_ctxs = &ioctx->s_rw_ctx;
> +	} else {
> +		ioctx->rw_ctxs = kmalloc_array(nbufs, sizeof(*ioctx->rw_ctxs),
> +				GFP_KERNEL);
> +		if (!ioctx->rw_ctxs)
> +			return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < nbufs; i++, db++) {
> +		struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
> +		u64 remote_addr = be64_to_cpu(db->va);
> +		u32 size = be32_to_cpu(db->len);
> +		u32 rkey = be32_to_cpu(db->key);
> +
> +		ret = target_alloc_sgl(&ctx->sg, &ctx->nents, size, false,
> +				i < nbufs - 1);
> +		if (ret)
> +			goto unwind;
> +
> +		ret = rdma_rw_ctx_init(&ctx->rw, ch->qp, ch->sport->port,
> +				ctx->sg, ctx->nents, 0, remote_addr, rkey, dir);
> +		if (ret < 0) {
> +			target_free_sgl(ctx->sg, ctx->nents);
> +			goto unwind;
> +		}
> +
> +		ioctx->n_rdma += ret;
> +
> +		if (prev) {
> +			sg_unmark_end(&prev[prev_nents - 1]);
> +			sg_chain(prev, prev_nents + 1, ctx->sg);
> +		} else {
> +			*sg = ctx->sg;
> +		}
> +
> +		prev = ctx->sg;
> +		prev_nents = ctx->nents;
> +
> +		*sg_cnt += ctx->nents;
> +	}
> +
> +	ioctx->n_rw_ctx = nbufs;
> +	return 0;
> +
> +unwind:
> +	while (--i >= 0) {
> +		struct srpt_rw_ctx *ctx = &ioctx->rw_ctxs[i];
> +
> +		rdma_rw_ctx_destroy(&ctx->rw, ch->qp, ch->sport->port,
> +				ctx->sg, ctx->nents, dir);
> +		target_free_sgl(ctx->sg, ctx->nents);
> +	}
> +	if (ioctx->rw_ctxs && ioctx->rw_ctxs != &ioctx->s_rw_ctx)

You can jump to unwind label from one place only. In that stage
"ioctx->rw_ctxs != NULL" will always be true.

> +		kfree(ioctx->rw_ctxs);
> +	return ret;
> +}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux