[PATCH RFC 2/4] RDMA/umem: Add API to find best driver supported page size in an MR

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This helper iterates through the SG list to find the best page size to use
from a bitmap of HW supported page sizes. Drivers that support multiple
page sizes, but not mixed pages in an MR can call this API.

Suggested-by: Jason Gunthorpe <jgg@xxxxxxxx>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx>
Signed-off-by: Shiraz Saleem <shiraz.saleem@xxxxxxxxx>
---
 drivers/infiniband/core/umem.c | 95 ++++++++++++++++++++++++++++++++++++++++++
 include/rdma/ib_umem.h         |  7 ++++
 2 files changed, 102 insertions(+)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 486d6d7..04071b5 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -66,6 +66,101 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
 }
 
 /**
+ * ib_umem_find_pg_bit - Find the page bit to use for phyaddr
+ *
+ * @phyaddr: Physical address after DMA translation
+ * @supported_pgsz: bitmask of HW supported page sizes
+ */
+static int ib_umem_find_pg_bit(unsigned long phyaddr,
+			       unsigned long supported_pgsz)
+{
+	unsigned long num_zeroes;
+	int pg_bit;
+
+	/* Trailing zero bits in the address */
+	num_zeroes = __ffs(phyaddr);
+
+	/* Find page bit such that phyaddr is aligned to the highest supported
+	 * HW page size
+	 */
+	pg_bit = fls64(supported_pgsz & (BIT_ULL(num_zeroes + 1) - 1)) - 1;
+
+	return pg_bit;
+}
+
+/**
+ * ib_umem_find_single_pg_size - Find best HW page size to use for this MR
+ * @umem: umem struct
+ * @supported_pgsz: bitmask of HW supported page sizes
+ *
+ * This helper is intended for HW that support multiple page
+ * sizes but can do only a single page size in an MR.
+ */
+unsigned long ib_umem_find_single_pg_size(struct ib_umem *umem,
+					  unsigned long supported_pgsz)
+{
+	struct scatterlist *sg;
+	unsigned long dma_addr_start, dma_addr_end;
+	unsigned long uvirt_offset, phy_offset;
+	unsigned long pg_mask, bitmap;
+	int pg_bit_start, pg_bit_end, pg_bit_sg_chunk;
+	int lowest_pg_bit, best_pg_bit;
+	int i;
+
+	if (!supported_pgsz)
+		return 0;
+
+	lowest_pg_bit = __ffs(supported_pgsz);
+	best_pg_bit = fls64(supported_pgsz) - 1;
+
+	for_each_sg(umem->sg_head.sgl, sg, umem->sg_head.orig_nents, i) {
+		dma_addr_start = sg_dma_address(sg);
+		dma_addr_end = sg_dma_address(sg) + sg_dma_len(sg);
+		pg_bit_start = ib_umem_find_pg_bit(dma_addr_start, supported_pgsz);
+		pg_bit_end = ib_umem_find_pg_bit(dma_addr_end, supported_pgsz);
+
+		if (!i) {
+			pg_bit_sg_chunk = max_t(int, pg_bit_start, pg_bit_end);
+			bitmap = supported_pgsz;
+			/* The start offset of the MR into a first _large_ page
+			 * should line up exactly for the user-space virtual buf
+			 * and physical buffer, in order to upgrade the page bit
+			 */
+			while (pg_bit_sg_chunk > PAGE_SHIFT) {
+				pg_mask = ~((1 << pg_bit_sg_chunk) - 1);
+				uvirt_offset = umem->address & ~pg_mask;
+				phy_offset = (dma_addr_start + ib_umem_offset(umem)) &
+					      ~pg_mask;
+				if (uvirt_offset == phy_offset)
+					break;
+
+				/* Retry with next supported page size */
+				clear_bit(pg_bit_sg_chunk, &bitmap);
+				pg_bit_sg_chunk = fls64(bitmap) - 1;
+			}
+		} else if (i == (umem->sg_head.orig_nents - 1)) {
+			/* last SG chunk: Does not matter if MR ends at an
+			 * unaligned offset.
+			 */
+			pg_bit_sg_chunk = pg_bit_start;
+		} else {
+			pg_bit_sg_chunk = min_t(int, pg_bit_start, pg_bit_end);
+		}
+
+		best_pg_bit = min_t(int, best_pg_bit, pg_bit_sg_chunk);
+		if (best_pg_bit == lowest_pg_bit)
+			break;
+	}
+
+	/* best page bit cannot be less than the lowest supported HW size */
+	if (best_pg_bit < lowest_pg_bit)
+		return BIT_ULL(lowest_pg_bit);
+
+	return BIT_ULL(best_pg_bit);
+}
+EXPORT_SYMBOL(ib_umem_find_single_pg_size);
+
+/**
  * ib_umem_get - Pin and DMA map userspace memory.
  *
  * If access flags indicate ODP memory, avoid pinning. Instead, stores
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 5d3755e..24ba6c6 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -86,6 +86,8 @@ void ib_umem_release(struct ib_umem *umem);
 int ib_umem_page_count(struct ib_umem *umem);
 int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
 		      size_t length);
+unsigned long ib_umem_find_single_pg_size(struct ib_umem *umem,
+					  unsigned long supported_pgsz);
 
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
@@ -102,6 +104,11 @@ static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offs
 		      		    size_t length) {
 	return -EINVAL;
 }
+static inline int ib_umem_find_single_pg_size(struct ib_umem *umem,
+					      unsigned long supported_pgsz) {
+	return -EINVAL;
+}
+
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 
 #endif /* IB_UMEM_H */
-- 
2.8.3




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux