[PATCH V2 1/3] RDMA/hns: Support WQE/CQE/PBL page size configurable feature in hip08

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch updates to support WQE, CQE and PBL page size configurable
feature, which includes base address page size and buffer page size.

Signed-off-by: Shaobo Xu <xushaobo2@xxxxxxxxxx>
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@xxxxxxxxxx>
Signed-off-by: Lijun Ou <oulijun@xxxxxxxxxx>
---
 drivers/infiniband/hw/hns/hns_roce_alloc.c  | 29 +++++----
 drivers/infiniband/hw/hns/hns_roce_cq.c     | 21 ++++++-
 drivers/infiniband/hw/hns/hns_roce_device.h | 10 ++--
 drivers/infiniband/hw/hns/hns_roce_mr.c     | 93 ++++++++++++++++++++---------
 drivers/infiniband/hw/hns/hns_roce_qp.c     | 46 ++++++++++----
 5 files changed, 142 insertions(+), 57 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 8c9a33f..3e4c525 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -167,12 +167,12 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
 	if (buf->nbufs == 1) {
 		dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map);
 	} else {
-		if (bits_per_long == 64)
+		if (bits_per_long == 64 && buf->page_shift == PAGE_SHIFT)
 			vunmap(buf->direct.buf);
 
 		for (i = 0; i < buf->nbufs; ++i)
 			if (buf->page_list[i].buf)
-				dma_free_coherent(dev, PAGE_SIZE,
+				dma_free_coherent(dev, 1 << buf->page_shift,
 						  buf->page_list[i].buf,
 						  buf->page_list[i].map);
 		kfree(buf->page_list);
@@ -181,20 +181,27 @@ void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
 EXPORT_SYMBOL_GPL(hns_roce_buf_free);
 
 int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
-		       struct hns_roce_buf *buf)
+		       struct hns_roce_buf *buf, u32 page_shift)
 {
 	int i = 0;
 	dma_addr_t t;
 	struct page **pages;
 	struct device *dev = hr_dev->dev;
 	u32 bits_per_long = BITS_PER_LONG;
+	u32 page_size = 1 << page_shift;
+	u32 order;
 
 	/* SQ/RQ buf lease than one page, SQ + RQ = 8K */
 	if (size <= max_direct) {
 		buf->nbufs = 1;
 		/* Npages calculated by page_size */
-		buf->npages = 1 << get_order(size);
-		buf->page_shift = PAGE_SHIFT;
+		order = get_order(size);
+		if (order <= page_shift - PAGE_SHIFT)
+			order = 0;
+		else
+			order -= page_shift - PAGE_SHIFT;
+		buf->npages = 1 << order;
+		buf->page_shift = page_shift;
 		/* MTT PA must be recorded in 4k alignment, t is 4k aligned */
 		buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL);
 		if (!buf->direct.buf)
@@ -209,9 +216,9 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
 
 		memset(buf->direct.buf, 0, size);
 	} else {
-		buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+		buf->nbufs = (size + page_size - 1) / page_size;
 		buf->npages = buf->nbufs;
-		buf->page_shift = PAGE_SHIFT;
+		buf->page_shift = page_shift;
 		buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
 					 GFP_KERNEL);
 
@@ -220,16 +227,16 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf = dma_alloc_coherent(dev,
-								  PAGE_SIZE, &t,
+								  page_size, &t,
 								  GFP_KERNEL);
 
 			if (!buf->page_list[i].buf)
 				goto err_free;
 
 			buf->page_list[i].map = t;
-			memset(buf->page_list[i].buf, 0, PAGE_SIZE);
+			memset(buf->page_list[i].buf, 0, page_size);
 		}
-		if (bits_per_long == 64) {
+		if (bits_per_long == 64 && page_shift == PAGE_SHIFT) {
 			pages = kmalloc_array(buf->nbufs, sizeof(*pages),
 					      GFP_KERNEL);
 			if (!pages)
@@ -243,6 +250,8 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
 			kfree(pages);
 			if (!buf->direct.buf)
 				goto err_free;
+		} else {
+			buf->direct.buf = NULL;
 		}
 	}
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 88cdf6f..f558f95 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -220,6 +220,8 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
 				   struct ib_umem **umem, u64 buf_addr, int cqe)
 {
 	int ret;
+	u32 page_shift;
+	u32 npages;
 
 	*umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz,
 			    IB_ACCESS_LOCAL_WRITE, 1);
@@ -230,8 +232,19 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
 		buf->hr_mtt.mtt_type = MTT_TYPE_CQE;
 	else
 		buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
-	ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
-				(*umem)->page_shift, &buf->hr_mtt);
+
+	if (hr_dev->caps.cqe_buf_pg_sz) {
+		npages = (ib_umem_page_count(*umem) +
+			(1 << hr_dev->caps.cqe_buf_pg_sz) - 1) /
+			(1 << hr_dev->caps.cqe_buf_pg_sz);
+		page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
+		ret = hns_roce_mtt_init(hr_dev, npages, page_shift,
+					&buf->hr_mtt);
+	} else {
+		ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem),
+				(*umem)->page_shift,
+				&buf->hr_mtt);
+	}
 	if (ret)
 		goto err_buf;
 
@@ -253,9 +266,11 @@ static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev,
 				    struct hns_roce_cq_buf *buf, u32 nent)
 {
 	int ret;
+	u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz;
 
 	ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz,
-				 PAGE_SIZE * 2, &buf->hr_buf);
+				 (1 << page_shift) * 2, &buf->hr_buf,
+				 page_shift);
 	if (ret)
 		goto out;
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index b314ac0..9353400 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -711,12 +711,14 @@ static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest)
 static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset)
 {
 	u32 bits_per_long_val = BITS_PER_LONG;
+	u32 page_size = 1 << buf->page_shift;
 
-	if (bits_per_long_val == 64 || buf->nbufs == 1)
+	if ((bits_per_long_val == 64 && buf->page_shift == PAGE_SHIFT) ||
+	    buf->nbufs == 1)
 		return (char *)(buf->direct.buf) + offset;
 	else
-		return (char *)(buf->page_list[offset >> PAGE_SHIFT].buf) +
-		       (offset & (PAGE_SIZE - 1));
+		return (char *)(buf->page_list[offset >> buf->page_shift].buf) +
+		       (offset & (page_size - 1));
 }
 
 int hns_roce_init_uar_table(struct hns_roce_dev *dev);
@@ -787,7 +789,7 @@ int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
 void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
 		       struct hns_roce_buf *buf);
 int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
-		       struct hns_roce_buf *buf);
+		       struct hns_roce_buf *buf, u32 page_shift);
 
 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
 			       struct hns_roce_mtt *mtt, struct ib_umem *umem);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 452136d..c47a5ee 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -708,11 +708,17 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
 	dma_addr_t dma_handle;
 	__le64 *mtts;
 	u32 s = start_index * sizeof(u64);
+	u32 bt_page_size;
 	u32 i;
 
+	if (mtt->mtt_type == MTT_TYPE_WQE)
+		bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
+	else
+		bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+
 	/* All MTTs must fit in the same page */
-	if (start_index / (PAGE_SIZE / sizeof(u64)) !=
-		(start_index + npages - 1) / (PAGE_SIZE / sizeof(u64)))
+	if (start_index / (bt_page_size / sizeof(u64)) !=
+		(start_index + npages - 1) / (bt_page_size / sizeof(u64)))
 		return -EINVAL;
 
 	if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
@@ -746,12 +752,18 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
 {
 	int chunk;
 	int ret;
+	u32 bt_page_size;
 
 	if (mtt->order < 0)
 		return -EINVAL;
 
+	if (mtt->mtt_type == MTT_TYPE_WQE)
+		bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
+	else
+		bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+
 	while (npages > 0) {
-		chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
+		chunk = min_t(int, bt_page_size / sizeof(u64), npages);
 
 		ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk,
 					       page_list);
@@ -869,25 +881,44 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
 int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
 			       struct hns_roce_mtt *mtt, struct ib_umem *umem)
 {
+	struct device *dev = hr_dev->dev;
 	struct scatterlist *sg;
+	unsigned int order;
 	int i, k, entry;
+	int npage = 0;
 	int ret = 0;
+	int len;
+	u64 page_addr;
 	u64 *pages;
+	u32 bt_page_size;
 	u32 n;
-	int len;
 
-	pages = (u64 *) __get_free_page(GFP_KERNEL);
+	order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz :
+		hr_dev->caps.cqe_ba_pg_sz;
+	bt_page_size = 1 << (order + PAGE_SHIFT);
+
+	pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
 	if (!pages)
 		return -ENOMEM;
 
 	i = n = 0;
 
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		len = sg_dma_len(sg) >> mtt->page_shift;
+		len = sg_dma_len(sg) >> PAGE_SHIFT;
 		for (k = 0; k < len; ++k) {
-			pages[i++] = sg_dma_address(sg) +
-				(k << umem->page_shift);
-			if (i == PAGE_SIZE / sizeof(u64)) {
+			page_addr =
+				sg_dma_address(sg) + (k << umem->page_shift);
+			if (!(npage % (1 << (mtt->page_shift - PAGE_SHIFT)))) {
+				if (page_addr & ((1 << mtt->page_shift) - 1)) {
+					dev_err(dev, "page_addr 0x%llx is not page_shift %d alignment!\n",
+						page_addr, mtt->page_shift);
+					ret = -EINVAL;
+					goto out;
+				}
+				pages[i++] = page_addr;
+			}
+			npage++;
+			if (i == bt_page_size / sizeof(u64)) {
 				ret = hns_roce_write_mtt(hr_dev, mtt, n, i,
 							 pages);
 				if (ret)
@@ -911,29 +942,37 @@ static int hns_roce_ib_umem_write_mr(struct hns_roce_dev *hr_dev,
 				     struct ib_umem *umem)
 {
 	struct scatterlist *sg;
-	int i = 0, j = 0;
+	int i = 0, j = 0, k;
 	int entry;
+	int len;
+	u64 page_addr;
+	u32 pbl_bt_sz;
 
 	if (hr_dev->caps.pbl_hop_num == HNS_ROCE_HOP_NUM_0)
 		return 0;
 
+	pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT);
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-		if (!hr_dev->caps.pbl_hop_num) {
-			mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12;
-			i++;
-		} else if (hr_dev->caps.pbl_hop_num == 1) {
-			mr->pbl_buf[i] = sg_dma_address(sg);
-			i++;
-		} else {
-			if (hr_dev->caps.pbl_hop_num == 2)
-				mr->pbl_bt_l1[i][j] = sg_dma_address(sg);
-			else if (hr_dev->caps.pbl_hop_num == 3)
-				mr->pbl_bt_l2[i][j] = sg_dma_address(sg);
-
-			j++;
-			if (j >= (PAGE_SIZE / 8)) {
-				i++;
-				j = 0;
+		len = sg_dma_len(sg) >> PAGE_SHIFT;
+		for (k = 0; k < len; ++k) {
+			page_addr = sg_dma_address(sg) +
+				    (k << umem->page_shift);
+
+			if (!hr_dev->caps.pbl_hop_num) {
+				mr->pbl_buf[i++] = page_addr >> 12;
+			} else if (hr_dev->caps.pbl_hop_num == 1) {
+				mr->pbl_buf[i++] = page_addr;
+			} else {
+				if (hr_dev->caps.pbl_hop_num == 2)
+					mr->pbl_bt_l1[i][j] = page_addr;
+				else if (hr_dev->caps.pbl_hop_num == 3)
+					mr->pbl_bt_l2[i][j] = page_addr;
+
+				j++;
+				if (j >= (pbl_bt_sz / 8)) {
+					i++;
+					j = 0;
+				}
 			}
 		}
 	}
@@ -986,7 +1025,7 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	} else {
 		int pbl_size = 1;
 
-		bt_size = (1 << PAGE_SHIFT) / 8;
+		bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
 		for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
 			pbl_size *= bt_size;
 		if (n > pbl_size) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index e6d1115..b1c9a37 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -322,6 +322,7 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 {
 	u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz);
 	u8 max_sq_stride = ilog2(roundup_sq_stride);
+	u32 page_size;
 	u32 max_cnt;
 
 	/* Sanity check SQ size before proceeding */
@@ -363,28 +364,29 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev,
 		hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
 					     hr_qp->sq.wqe_shift), PAGE_SIZE);
 	} else {
+		page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
 		hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt <<
-					     hr_qp->rq.wqe_shift), PAGE_SIZE) +
+					     hr_qp->rq.wqe_shift), page_size) +
 				   HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
-					     hr_qp->sge.sge_shift), PAGE_SIZE) +
+					     hr_qp->sge.sge_shift), page_size) +
 				   HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt <<
-					     hr_qp->sq.wqe_shift), PAGE_SIZE);
+					     hr_qp->sq.wqe_shift), page_size);
 
 		hr_qp->sq.offset = 0;
 		if (hr_qp->sge.sge_cnt) {
 			hr_qp->sge.offset = HNS_ROCE_ALOGN_UP(
 							(hr_qp->sq.wqe_cnt <<
 							hr_qp->sq.wqe_shift),
-							PAGE_SIZE);
+							page_size);
 			hr_qp->rq.offset = hr_qp->sge.offset +
 					HNS_ROCE_ALOGN_UP((hr_qp->sge.sge_cnt <<
 						hr_qp->sge.sge_shift),
-						PAGE_SIZE);
+						page_size);
 		} else {
 			hr_qp->rq.offset = HNS_ROCE_ALOGN_UP(
 							(hr_qp->sq.wqe_cnt <<
 							hr_qp->sq.wqe_shift),
-							PAGE_SIZE);
+							page_size);
 		}
 	}
 
@@ -396,6 +398,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 				       struct hns_roce_qp *hr_qp)
 {
 	struct device *dev = hr_dev->dev;
+	u32 page_size;
 	u32 max_cnt;
 	int size;
 
@@ -435,19 +438,20 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 	}
 
 	/* Get buf size, SQ and RQ are aligned to PAGE_SIZE */
+	page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT);
 	hr_qp->sq.offset = 0;
 	size = HNS_ROCE_ALOGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift,
-				 PAGE_SIZE);
+				 page_size);
 
 	if (hr_dev->caps.max_sq_sg > 2 && hr_qp->sge.sge_cnt) {
 		hr_qp->sge.offset = size;
 		size += HNS_ROCE_ALOGN_UP(hr_qp->sge.sge_cnt <<
-					  hr_qp->sge.sge_shift, PAGE_SIZE);
+					  hr_qp->sge.sge_shift, page_size);
 	}
 
 	hr_qp->rq.offset = size;
 	size += HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift),
-				  PAGE_SIZE);
+				  page_size);
 	hr_qp->buff_size = size;
 
 	/* Get wr and sge number which send */
@@ -470,6 +474,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 	struct hns_roce_ib_create_qp ucmd;
 	unsigned long qpn = 0;
 	int ret = 0;
+	u32 page_shift;
+	u32 npages;
 
 	mutex_init(&hr_qp->mutex);
 	spin_lock_init(&hr_qp->sq.lock);
@@ -513,8 +519,20 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		}
 
 		hr_qp->mtt.mtt_type = MTT_TYPE_WQE;
-		ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem),
-					hr_qp->umem->page_shift, &hr_qp->mtt);
+		if (hr_dev->caps.mtt_buf_pg_sz) {
+			npages = (ib_umem_page_count(hr_qp->umem) +
+				  (1 << hr_dev->caps.mtt_buf_pg_sz) - 1) /
+				  (1 << hr_dev->caps.mtt_buf_pg_sz);
+			page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
+			ret = hns_roce_mtt_init(hr_dev, npages,
+				    page_shift,
+				    &hr_qp->mtt);
+		} else {
+			ret = hns_roce_mtt_init(hr_dev,
+				    ib_umem_page_count(hr_qp->umem),
+				    hr_qp->umem->page_shift,
+				    &hr_qp->mtt);
+		}
 		if (ret) {
 			dev_err(dev, "hns_roce_mtt_init error for create qp\n");
 			goto err_buf;
@@ -555,8 +573,10 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 				     DB_REG_OFFSET * hr_dev->priv_uar.index;
 
 		/* Allocate QP buf */
-		if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, PAGE_SIZE * 2,
-				       &hr_qp->hr_buf)) {
+		page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
+		if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size,
+				       (1 << page_shift) * 2,
+				       &hr_qp->hr_buf, page_shift)) {
 			dev_err(dev, "hns_roce_buf_alloc error!\n");
 			ret = -ENOMEM;
 			goto err_out;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux