[PATCH v2 rdma-next 1/5] RDMA/umem: Add API to find best driver supported page size in an MR

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This helper iterates through the SG list to find the best page size to use
from a bitmap of HW supported page sizes. Drivers that support multiple
page sizes, but not mixed sizes in an MR can use this API.

Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
Cc: Gal Pressman <galpress@xxxxxxxxxx>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
---
 drivers/infiniband/core/umem.c | 57 ++++++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_umem.h         | 19 ++++++++++++++
 include/rdma/ib_verbs.h        | 24 ++++++++++++++++++
 3 files changed, 100 insertions(+)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 7e912a9..8624ba1 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -127,6 +127,63 @@ static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
 }
 
 /**
+ * ib_umem_find_best_pgsz - Find best HW page size to use for this MR
+ *
+ * @umem: umem struct
+ * @pgsz_bitmap: bitmap of HW supported page sizes
+ * @flags: see enum ib_umem_find_best_pgsz_flags
+ * @uvirt_addr: user-space virtual MR base address (provided if
+ * IB_UMEM_VA_BASED_OFFSET flag is set)
+ *
+ * This helper is intended for HW that support multiple page
+ * sizes but can do only a single page size in an MR.
+ *
+ * Returns 0 if the umem requires page sizes not supported by
+ * the driver to be mapped. Drivers always supporting PAGE_SIZE
+ * or smaller will never see a 0 result.
+ */
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+				     unsigned long pgsz_bitmap,
+				     unsigned int flags,
+				     unsigned long uvirt_addr)
+{
+	struct scatterlist *sg;
+	dma_addr_t mask = 0;
+	unsigned int best_pg_bit;
+	int i;
+
+	if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
+		return 0;
+
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
+		dma_addr_t dma_addr_start, dma_addr_end;
+
+		dma_addr_start = sg_dma_address(sg);
+		dma_addr_end = sg_dma_address(sg) + sg_dma_len(sg);
+		if (!i)
+			/* first SGE: can start on partial page size.
+			 * Ignore alignment of start addr.
+			 * For HW that uses VA_BASED_OFFSET, minimal alignment
+			 * restricted by end of the first page in virtual space.
+			 */
+			mask |= (flags & IB_UMEM_VA_BASED_OFFSET ?
+					((uvirt_addr + sg_dma_len(sg)) | dma_addr_end) :
+					dma_addr_end);
+		else if (i == (umem->nmap - 1))
+			/* last SGE: Can end on a partial page size.
+			 * Ignore alignment of end addr.
+			 */
+			mask |= dma_addr_start;
+		else
+			mask |= (dma_addr_start | dma_addr_end);
+	}
+	best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
+
+	return BIT_ULL(best_pg_bit);
+}
+EXPORT_SYMBOL(ib_umem_find_best_pgsz);
+
+/**
  * ib_umem_get - Pin and DMA map userspace memory.
  *
  * If access flags indicate ODP memory, avoid pinning. Instead, stores
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index b13a2e9..3fb403a 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -56,6 +56,14 @@ struct ib_umem {
 	unsigned int    sg_nents;
 };
 
+enum ib_umem_find_best_pgsz_flags {
+	/* Flag for HW that uses least significant bits (eg: 12 bits for
+	 * 4K pages, 21 bits for 2M pages) of the VA to indicate start offset
+	 * into the DMA page list.
+	 */
+	IB_UMEM_VA_BASED_OFFSET = (1 << 0),
+};
+
 /* Returns the offset of the umem start relative to the first page. */
 static inline int ib_umem_offset(struct ib_umem *umem)
 {
@@ -87,6 +95,10 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
 int ib_umem_page_count(struct ib_umem *umem);
 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 		      size_t length);
+unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
+				     unsigned long pgsz_bitmap,
+				     unsigned int flags,
+				     unsigned long uvirt_addr);
 
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
@@ -104,6 +116,13 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs
 		      		    size_t length) {
 	return -EINVAL;
 }
+static inline int ib_umem_find_best_pgsz(struct ib_umem *umem,
+					 unsigned long pgsz_bitmap,
+					 unsigned int flags,
+					 unsigned long uvirt_addr) {
+	return -EINVAL;
+}
+
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 
 #endif /* IB_UMEM_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 43a75ab..720ce23 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -3214,6 +3214,30 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
 	return rdma_protocol_iwarp(dev, port_num);
 }
 
+/**
+ * rdma_find_pg_bit - Find page bit given address and HW supported page sizes
+ *
+ * @addr: address
+ * @pgsz_bitmap: bitmap of HW supported page sizes
+ */
+static inline unsigned int rdma_find_pg_bit(unsigned long addr,
+					    unsigned long pgsz_bitmap)
+{
+	unsigned long align;
+	unsigned long pgsz;
+
+	align = addr & -addr;
+
+	/* Find page bit such that addr is aligned to the highest supported
+	 * HW page size
+	 */
+	pgsz = pgsz_bitmap & ~(-align << 1);
+	if (!pgsz)
+		return __ffs(pgsz_bitmap);
+
+	return __fls(pgsz);
+}
+
 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
 			 int state);
 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
-- 
1.8.3.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux