On 3/11/2019 4:11 PM, Christoph Hellwig wrote:
On Sun, Mar 10, 2019 at 01:12:59PM +0200, Max Gurtovoy wrote:
Nothing uses ib_scatterlist either, as you just introduce it. I'd
rather move to a common and more useful structure directly.
We've started exploring the sg_table option and found that there is no
dma_nents arg there.
The dma_nents member is called nents, the original non-coalesced one
is called orig_nents.
after looking at the scsi implementation we came up with the attached
API proposal (untested).
In this API we add dma_nents to sg_table to save the return value from
dma_map_sg (that can augment the list, and not shrink it).
ib_map_mr_sg will use the appropriate values of the sg_table...
thoughts ?
From 5473d15584ae840960d29d022818bd76edea37ac Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@xxxxxxxxxxxx>
Date: Wed, 13 Mar 2019 16:48:58 +0000
Subject: [PATCH] nvme-rdma: Use sg_table
---
drivers/nvme/host/rdma.c | 30 ++++++++++++++++--------------
include/linux/scatterlist.h | 1 +
2 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 52abc3a6de12..ff7b7ea38e1a 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -66,7 +66,6 @@ struct nvme_rdma_request {
refcount_t ref;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge;
- int nents;
struct ib_reg_wr reg_wr;
struct ib_cqe reg_cqe;
struct nvme_rdma_queue *queue;
@@ -1167,7 +1166,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
}
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
- req->nents, rq_data_dir(rq) ==
+ req->sg_table.nents, rq_data_dir(rq) ==
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
nvme_cleanup_cmd(rq);
@@ -1186,13 +1185,13 @@ static int nvme_rdma_set_sg_null(struct nvme_command *c)
}
static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
- struct nvme_rdma_request *req, struct nvme_command *c,
- int count)
+ struct nvme_rdma_request *req, struct nvme_command *c)
{
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
struct scatterlist *sgl = req->sg_table.sgl;
struct ib_sge *sge = &req->sge[1];
u32 len = 0;
+ int count = req->sg_table.dma_nents;
int i;
for (i = 0; i < count; i++, sgl++, sge++) {
@@ -1223,10 +1222,10 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
}
static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
- struct nvme_rdma_request *req, struct nvme_command *c,
- int count)
+ struct nvme_rdma_request *req, struct nvme_command *c)
{
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+ int count = req->sg_table.dma_nents;
int nr;
req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
@@ -1237,7 +1236,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
* Align the MR to a 4K page size to match the ctrl page size and
* the block virtual boundary.
*/
- nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
+ nr = ib_map_mr_sg(req->mr, &req->sg_table, NULL, SZ_4K);
if (unlikely(nr < count)) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
req->mr = NULL;
@@ -1274,7 +1273,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
- int count, ret;
+ int count, nents, ret;
req->num_sge = 1;
refcount_set(&req->ref, 2); /* send and recv completions */
@@ -1290,21 +1289,24 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
if (ret)
return -ENOMEM;
- req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
+ nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
+ BUG_ON(nents > req->sg_table.nents);
+ req->sg_table.nents = nents;
- count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
+ req->sg_table.dma_nents = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents,
rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
- if (unlikely(count <= 0)) {
+ if (unlikely(req->sg_table.dma_nents <= 0)) {
ret = -EIO;
goto out_free_table;
}
+ count = req->sg_table.dma_nents;
if (count <= dev->num_inline_segments) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
queue->ctrl->use_inline_data &&
blk_rq_payload_bytes(rq) <=
nvme_rdma_inline_data_size(queue)) {
- ret = nvme_rdma_map_sg_inline(queue, req, c, count);
+ ret = nvme_rdma_map_sg_inline(queue, req, c);
goto out;
}
@@ -1314,7 +1316,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
}
}
- ret = nvme_rdma_map_sg_fr(queue, req, c, count);
+ ret = nvme_rdma_map_sg_fr(queue, req, c);
out:
if (unlikely(ret))
goto out_unmap_sg;
@@ -1323,7 +1325,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
out_unmap_sg:
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
- req->nents, rq_data_dir(rq) ==
+ nents, rq_data_dir(rq) ==
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
out_free_table:
sg_free_table_chained(&req->sg_table, true);
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index b96f0d0b5b8f..70644a9f7980 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -43,6 +43,7 @@ struct sg_table {
struct scatterlist *sgl; /* the list */
unsigned int nents; /* number of mapped entries */
unsigned int orig_nents; /* original size of list */
+ int dma_nents; /* returned by dma_map_sg */
};
/*
--
2.16.3