[for-next PATCH 1/4] RDMA/bnxt_re: Increase memory registration size of a single MR

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Somnath Kotur <somnath.kotur@xxxxxxxxxxxx>

Currently bnxt_re driver is not utilizing the full limits of our h/w
to register single MR. Increase the maximum  MR size to 256G.
The patch includes the following changes.

 - Use 'vmalloc' instead of 'kmalloc' while trying to allocate a
   temporary place holder for all the pointers to user memory pages.
 - Determine how many logical PDEs are required to store the pointers
   to user memory pages and allocate contigous pages of sufficient
   size to store all of them.

Signed-off-by: Somnath Kotur <somnath.kotur@xxxxxxxxxxxx>
Signed-off-by: Selvin Xavier <selvin.xavier@xxxxxxxxxxxx>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h   |  3 ++
 drivers/infiniband/hw/bnxt_re/ib_verbs.c  | 24 ++++++--------
 drivers/infiniband/hw/bnxt_re/qplib_res.c | 54 ++++++++++++++++++++++++++-----
 drivers/infiniband/hw/bnxt_re/qplib_res.h |  4 ++-
 drivers/infiniband/hw/bnxt_re/qplib_sp.c  |  8 ++---
 5 files changed, 65 insertions(+), 28 deletions(-)

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 31baa893..d2fa2a6b 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -39,6 +39,9 @@
 
 #ifndef __BNXT_RE_H__
 #define __BNXT_RE_H__
+
+#include <linux/vmalloc.h>
+
 #define ROCE_DRV_MODULE_NAME		"bnxt_re"
 
 #define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 54fdd4c..9ff6810 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -3604,32 +3604,26 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 	}
 	mr->qplib_mr.total_size = length;
 
-	pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
-	if (!pbl_tbl) {
-		rc = -ENOMEM;
-		goto free_umem;
-	}
-
 	page_shift = umem->page_shift;
 
 	if (!bnxt_re_page_size_ok(page_shift)) {
 		dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
 		rc = -EFAULT;
-		goto fail;
+		goto free_umem;
 	}
 
-	if (!umem->hugetlb && length > BNXT_RE_MAX_MR_SIZE_LOW) {
-		dev_err(rdev_to_dev(rdev), "Requested MR Sz:%llu Max sup:%llu",
-			length,	(u64)BNXT_RE_MAX_MR_SIZE_LOW);
-		rc = -EINVAL;
-		goto fail;
-	}
 	if (umem->hugetlb && length > BNXT_RE_PAGE_SIZE_2M) {
 		page_shift = BNXT_RE_PAGE_SHIFT_2M;
 		dev_warn(rdev_to_dev(rdev), "umem hugetlb set page_size %x",
 			 1 << page_shift);
 	}
 
+	pbl_tbl = vmalloc(umem_pgs * sizeof(u64 *));
+	if (!pbl_tbl) {
+		rc = -EINVAL;
+		goto free_umem;
+	}
+
 	/* Map umem buf ptrs to the PBL */
 	umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift);
 	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl,
@@ -3639,7 +3633,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 		goto fail;
 	}
 
-	kfree(pbl_tbl);
+	vfree(pbl_tbl);
 
 	mr->ib_mr.lkey = mr->qplib_mr.lkey;
 	mr->ib_mr.rkey = mr->qplib_mr.lkey;
@@ -3647,7 +3641,7 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
 
 	return &mr->ib_mr;
 fail:
-	kfree(pbl_tbl);
+	vfree(pbl_tbl);
 free_umem:
 	ib_umem_release(umem);
 free_mrw:
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 59eeac5..13fa2d4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -164,7 +164,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
 			      u32 *elements, u32 element_size, u32 aux,
 			      u32 pg_size, enum bnxt_qplib_hwq_type hwq_type)
 {
-	u32 pages, slots, size, aux_pages = 0, aux_size = 0;
+	u32 pages, slots, size, aux_pages = 0, aux_size = 0, num_pdes, alloc_sz;
 	dma_addr_t *src_phys_ptr, **dst_virt_ptr;
 	int i, rc;
 
@@ -192,18 +192,41 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
 	}
 
 	/* Alloc the 1st memory block; can be a PDL/PTL/PBL */
-	if (sghead && (pages == MAX_PBL_LVL_0_PGS))
+	if (sghead && pages == MAX_PBL_LVL_0_PGS) {
 		rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead,
 				 pages, pg_size);
-	else
-		rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, 1, pg_size);
+	} else {
+		/*
+		 * Find out how many PDEs it takes to store all the PBLs(ptrs to
+		 * actual user pages).
+		 * We still need to allocate 1 contigous page to store all these
+		 * effectively logical PDEs though for HW access
+		 */
+		if (hwq_type == HWQ_TYPE_MR) {
+			num_pdes = pages >> MAX_PDL_LVL_SHIFT;
+			if (num_pdes) {
+				alloc_sz = num_pdes * pg_size;
+				dev_dbg(&pdev->dev, "num_pdes = %d alloc_sz 0x%x\n",
+					num_pdes, alloc_sz);
+				rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0],
+						 NULL, 1, alloc_sz);
+
+			} else {
+				rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0],
+						 NULL, 1, pg_size);
+			}
+		} else {
+			rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL,
+					 1, pg_size);
+		}
+	}
 	if (rc)
 		goto fail;
 
 	hwq->level = PBL_LVL_0;
 
 	if (pages > MAX_PBL_LVL_0_PGS) {
-		if (pages > MAX_PBL_LVL_1_PGS) {
+		if (pages > MAX_PBL_LVL_1_PGS && hwq_type != HWQ_TYPE_MR) {
 			/* 2 levels of indirection */
 			rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL,
 					 MAX_PBL_LVL_1_PGS_FOR_LVL_2, pg_size);
@@ -255,9 +278,24 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
 			dst_virt_ptr =
 				(dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
 			src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
-			for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) {
-				dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
-					src_phys_ptr[i] | flag;
+
+			if (hwq_type == HWQ_TYPE_MR) {
+				/*
+				 * For MR it is expected that we supply only
+				 * 1 contigous page i.e only 1 entry in the PDL
+				 * that will contain all the PBLs for the user
+				 * supplied memory region
+				 */
+				for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count;
+				     i++) {
+					dst_virt_ptr[0][i] = src_phys_ptr[i] |
+								flag;
+				}
+			} else {
+				for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count;
+				     i++)
+					dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+						src_phys_ptr[i] | flag;
 			}
 			if (hwq_type == HWQ_TYPE_QUEUE) {
 				/* Find the last pg of the size */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 2e5c052..a783bfa 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -55,12 +55,14 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
 enum bnxt_qplib_hwq_type {
 	HWQ_TYPE_CTX,
 	HWQ_TYPE_QUEUE,
-	HWQ_TYPE_L2_CMPL
+	HWQ_TYPE_L2_CMPL,
+	HWQ_TYPE_MR
 };
 
 #define MAX_PBL_LVL_0_PGS		1
 #define MAX_PBL_LVL_1_PGS		512
 #define MAX_PBL_LVL_1_PGS_SHIFT		9
+#define MAX_PDL_LVL_SHIFT		9
 #define MAX_PBL_LVL_1_PGS_FOR_LVL_2	256
 #define MAX_PBL_LVL_2_PGS		(256 * 512)
 
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 5216b5f..566b70a 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -668,11 +668,11 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 		pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
 		if (!pages)
 			pages++;
-
-		if (pages > MAX_PBL_LVL_1_PGS) {
+		/* Limit max MR size to 256 GB eventhough HW supports more */
+		if (pages > MAX_PBL_LVL_2_PGS) {
 			dev_err(&res->pdev->dev,
 				"SP: Reg MR pages requested (0x%x) exceeded max (0x%x)\n",
-				pages, MAX_PBL_LVL_1_PGS);
+				pages, MAX_PBL_LVL_2_PGS);
 			return -ENOMEM;
 		}
 		/* Free the hwq if it already exist, must be a rereg */
@@ -684,7 +684,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
 		rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0,
 					       &mr->hwq.max_elements,
 					       PAGE_SIZE, 0, PAGE_SIZE,
-					       HWQ_TYPE_CTX);
+					       HWQ_TYPE_MR);
 		if (rc) {
 			dev_err(&res->pdev->dev,
 				"SP: Reg MR memory allocation failed\n");
-- 
2.5.5




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux