[PATCH for-next 2/3] RDMA/erdma: Refactor the storage structure of MTT entries

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently our MTT only support inline mtt entries (0 level MTT) and
indirect MTT entries (1 level mtt), which will limit the maximum length
of MRs. In order to implement a multi-level MTT, we refactor the
structure of MTT first.

Signed-off-by: Cheng Xu <chengyou@xxxxxxxxxxxxxxxxx>
---
 drivers/infiniband/hw/erdma/erdma_hw.h    |   4 +-
 drivers/infiniband/hw/erdma/erdma_qp.c    |   2 +-
 drivers/infiniband/hw/erdma/erdma_verbs.c | 214 +++++++++++++---------
 drivers/infiniband/hw/erdma/erdma_verbs.h |  26 ++-
 4 files changed, 152 insertions(+), 94 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index a882b57aa118..80a78569bc2a 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -228,7 +228,7 @@ struct erdma_cmdq_ext_db_req {
 
 /* create_cq cfg1 */
 #define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16)
-#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15)
+#define ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK BIT(15)
 #define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11)
 #define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0)
 
@@ -258,7 +258,7 @@ struct erdma_cmdq_create_cq_req {
 
 /* regmr cfg2 */
 #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
-#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20)
+#define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20)
 #define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0)
 
 struct erdma_cmdq_reg_mr_req {
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index 44923c51a01b..6d0330badd68 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -410,7 +410,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
 			/* Copy SGLs to SQE content to accelerate */
 			memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
 					       qp->attrs.sq_size, SQEBB_SHIFT),
-			       mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents));
+			       mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents));
 			wqe_size = sizeof(struct erdma_reg_mr_sqe) +
 				   MTT_SIZE(mr->mem.mtt_nents);
 		} else {
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index fbbd046b350c..0d272f18256a 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -19,6 +19,23 @@
 #include "erdma_cm.h"
 #include "erdma_verbs.h"
 
+static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
+				      u64 *addr0, u64 *addr1)
+{
+	struct erdma_mtt *mtt = mem->mtt;
+
+	if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+		*addr0 = mtt->buf_dma;
+		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
+				   ERDMA_MR_INDIRECT_MTT);
+	} else {
+		*addr0 = mtt->buf[0];
+		memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
+		*cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
+				   ERDMA_MR_INLINE_MTT);
+	}
+}
+
 static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
 {
 	struct erdma_dev *dev = to_edev(qp->ibqp.device);
@@ -79,18 +96,16 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
 
 		req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
 		req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
-					     user_qp->sq_mem.mtt_nents) |
-				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
-					     user_qp->sq_mem.mtt_type);
+					     user_qp->sq_mem.mtt_nents);
 
 		req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
 		req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
-					     user_qp->rq_mem.mtt_nents) |
-				  FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
-					     user_qp->rq_mem.mtt_type);
+					     user_qp->rq_mem.mtt_nents);
 
-		req.sq_buf_addr = user_qp->sq_mem.mtt_entry[0];
-		req.rq_buf_addr = user_qp->rq_mem.mtt_entry[0];
+		assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
+					  &req.sq_buf_addr, req.sq_mtt_entry);
+		assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
+					  &req.rq_buf_addr, req.rq_mtt_entry);
 
 		req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
 		req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
@@ -117,13 +132,22 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
 
 static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
 {
-	struct erdma_cmdq_reg_mr_req req;
 	struct erdma_pd *pd = to_epd(mr->ibmr.pd);
-	u64 *phy_addr;
-	int i;
+	struct erdma_cmdq_reg_mr_req req;
+	u32 mtt_level;
 
 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
 
+	if (mr->type == ERDMA_MR_TYPE_FRMR ||
+	    mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+		req.phy_addr[0] = mr->mem.mtt->buf_dma;
+		mtt_level = ERDMA_MR_INDIRECT_MTT;
+	} else {
+		memcpy(req.phy_addr, mr->mem.mtt->buf,
+		       MTT_SIZE(mr->mem.page_cnt));
+		mtt_level = ERDMA_MR_INLINE_MTT;
+	}
+
 	req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
 		   FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
 		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
@@ -132,7 +156,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
 		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
 	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
 			      ilog2(mr->mem.page_size)) |
-		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
+		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
 
 	if (mr->type == ERDMA_MR_TYPE_DMA)
@@ -143,16 +167,6 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
 		req.size = mr->mem.len;
 	}
 
-	if (mr->type == ERDMA_MR_TYPE_FRMR ||
-	    mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
-		phy_addr = req.phy_addr;
-		*phy_addr = mr->mem.mtt_entry[0];
-	} else {
-		phy_addr = req.phy_addr;
-		for (i = 0; i < mr->mem.mtt_nents; i++)
-			*phy_addr++ = mr->mem.mtt_entry[i];
-	}
-
 post_cmd:
 	return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
 }
@@ -179,7 +193,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
 		req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
 
 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
-			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
+			    FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
 				       ERDMA_MR_INLINE_MTT);
 
 		req.first_page_offset = 0;
@@ -191,16 +205,20 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
 			FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
 				   ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
 		if (mem->mtt_nents == 1) {
-			req.qbuf_addr_l = lower_32_bits(*(u64 *)mem->mtt_buf);
-			req.qbuf_addr_h = upper_32_bits(*(u64 *)mem->mtt_buf);
+			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
+			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
+			req.cfg1 |=
+				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+					   ERDMA_MR_INLINE_MTT);
 		} else {
-			req.qbuf_addr_l = lower_32_bits(mem->mtt_entry[0]);
-			req.qbuf_addr_h = upper_32_bits(mem->mtt_entry[0]);
+			req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
+			req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
+			req.cfg1 |=
+				FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+					   ERDMA_MR_INDIRECT_MTT);
 		}
 		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
 				       mem->mtt_nents);
-		req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
-				       mem->mtt_type);
 
 		req.first_page_offset = mem->page_offset;
 		req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
@@ -508,12 +526,77 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
 	return -ENOMEM;
 }
 
+static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
+{
+	struct erdma_mtt *mtt = mem->mtt;
+	struct ib_block_iter biter;
+	u32 idx = 0;
+
+	while (mtt->low_level)
+		mtt = mtt->low_level;
+
+	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
+		mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
+}
+
+static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
+					       size_t size)
+{
+	struct erdma_mtt *mtt;
+	int ret = -ENOMEM;
+
+	mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
+	if (!mtt)
+		return ERR_PTR(-ENOMEM);
+
+	mtt->size = size;
+	mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
+	if (!mtt->buf)
+		goto err_free_mtt;
+
+	mtt->continuous = true;
+	mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
+				      DMA_TO_DEVICE);
+	if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
+		goto err_free_mtt_buf;
+
+	return mtt;
+
+err_free_mtt_buf:
+	kfree(mtt->buf);
+
+err_free_mtt:
+	kfree(mtt);
+
+	return ERR_PTR(ret);
+}
+
+static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
+					  bool force_continuous)
+{
+	ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
+		  force_continuous);
+
+	if (force_continuous)
+		return erdma_create_cont_mtt(dev, size);
+
+	return ERR_PTR(-ENOTSUPP);
+}
+
+static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
+{
+	if (mtt->continuous) {
+		dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
+				 DMA_TO_DEVICE);
+		kfree(mtt->buf);
+		kfree(mtt);
+	}
+}
+
 static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
 			   u64 start, u64 len, int access, u64 virt,
 			   unsigned long req_page_size, u8 force_indirect_mtt)
 {
-	struct ib_block_iter biter;
-	uint64_t *phy_addr = NULL;
 	int ret = 0;
 
 	mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
@@ -529,38 +612,13 @@ static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
 	mem->page_offset = start & (mem->page_size - 1);
 	mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
 	mem->page_cnt = mem->mtt_nents;
-
-	if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
-	    force_indirect_mtt) {
-		mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
-		mem->mtt_buf =
-			alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
-		if (!mem->mtt_buf) {
-			ret = -ENOMEM;
-			goto error_ret;
-		}
-		phy_addr = mem->mtt_buf;
-	} else {
-		mem->mtt_type = ERDMA_MR_INLINE_MTT;
-		phy_addr = mem->mtt_entry;
-	}
-
-	rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
-		*phy_addr = rdma_block_iter_dma_address(&biter);
-		phy_addr++;
+	mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt), true);
+	if (IS_ERR(mem->mtt)) {
+		ret = PTR_ERR(mem->mtt);
+		goto error_ret;
 	}
 
-	if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
-		mem->mtt_entry[0] =
-			dma_map_single(&dev->pdev->dev, mem->mtt_buf,
-				       MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
-		if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
-			free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
-			mem->mtt_buf = NULL;
-			ret = -ENOMEM;
-			goto error_ret;
-		}
-	}
+	erdma_fill_bottom_mtt(dev, mem);
 
 	return 0;
 
@@ -575,11 +633,8 @@ static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
 
 static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
 {
-	if (mem->mtt_buf) {
-		dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
-				 MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
-		free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
-	}
+	if (mem->mtt)
+		erdma_destroy_mtt(dev, mem->mtt);
 
 	if (mem->umem) {
 		ib_umem_release(mem->umem);
@@ -875,33 +930,20 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 
 	mr->mem.page_size = PAGE_SIZE; /* update it later. */
 	mr->mem.page_cnt = max_num_sg;
-	mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
-	mr->mem.mtt_buf =
-		alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
-	if (!mr->mem.mtt_buf) {
-		ret = -ENOMEM;
+	mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
+	if (IS_ERR(mr->mem.mtt)) {
+		ret = PTR_ERR(mr->mem.mtt);
 		goto out_remove_stag;
 	}
 
-	mr->mem.mtt_entry[0] =
-		dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
-			       MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
-	if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
-		ret = -ENOMEM;
-		goto out_free_mtt;
-	}
-
 	ret = regmr_cmd(dev, mr);
 	if (ret)
-		goto out_dma_unmap;
+		goto out_destroy_mtt;
 
 	return &mr->ibmr;
 
-out_dma_unmap:
-	dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
-			 MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
-out_free_mtt:
-	free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
+out_destroy_mtt:
+	erdma_destroy_mtt(dev, mr->mem.mtt);
 
 out_remove_stag:
 	erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
@@ -920,7 +962,7 @@ static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
 	if (mr->mem.mtt_nents >= mr->mem.page_cnt)
 		return -1;
 
-	*((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
+	mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
 	mr->mem.mtt_nents++;
 
 	return 0;
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index abaf031fe0d2..5f639f27a8a9 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -65,7 +65,7 @@ struct erdma_pd {
  * MemoryRegion definition.
  */
 #define ERDMA_MAX_INLINE_MTT_ENTRIES 4
-#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt entry takes 8 Bytes. */
+#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt entry takes 8 Bytes. */
 #define ERDMA_MR_MAX_MTT_CNT 524288
 #define ERDMA_MTT_ENTRY_SIZE 8
 
@@ -90,10 +90,28 @@ static inline u8 to_erdma_access_flags(int access)
 	       (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0);
 }
 
+/* Hierarchical storage structure for MTT entries */
+struct erdma_mtt {
+	u64 *buf;
+	size_t size;
+
+	bool continuous;
+	union {
+		dma_addr_t buf_dma;
+		struct {
+			struct scatterlist *sglist;
+			u32 nsg;
+			u32 level;
+		};
+	};
+
+	struct erdma_mtt *low_level;
+};
+
 struct erdma_mem {
 	struct ib_umem *umem;
-	void *mtt_buf;
-	u32 mtt_type;
+	struct erdma_mtt *mtt;
+
 	u32 page_size;
 	u32 page_offset;
 	u32 page_cnt;
@@ -101,8 +119,6 @@ struct erdma_mem {
 
 	u64 va;
 	u64 len;
-
-	u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES];
 };
 
 struct erdma_mr {
-- 
2.31.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux