ib_num_pages() should only be used by things working with the SGL in CPU pages directly. Drivers building DMA lists should use the new ib_num_dma_blocks() which returns the number of blocks rdma_umem_for_each_block() will return. To make this general for DMA drivers requires a different implementation. Computing DMA block count based on umem->address only works if the requested page size is < PAGE_SIZE and/or the IOVA == umem->address. Instead the number of DMA pages should be computed in the IOVA address space, not umem->address. Thus the IOVA has to be stored inside the umem so it can be used for these calculations. For now set it to umem->address by default and fix it up if ib_umem_find_best_pgsz() was called. This allows drivers to be converted to ib_umem_num_dma_blocks() safely. Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/infiniband/core/umem.c | 7 ++++++- drivers/infiniband/hw/cxgb4/mem.c | 2 +- drivers/infiniband/hw/mlx5/mem.c | 4 ++-- drivers/infiniband/hw/mthca/mthca_provider.c | 2 +- drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c | 2 +- include/rdma/ib_umem.h | 15 ++++++++++++--- 6 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fb7630e7aac3a7..b57dbb14de8378 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -161,7 +161,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0)))) return 0; - va = virt; + umem->iova = va = virt; /* The best result is the smallest page size that results in the minimum * number of required pages. Compute the largest page size that could * work based on VA address bits that don't change. @@ -240,6 +240,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, umem->ibdev = device; umem->length = size; umem->address = addr; + /* + * Drivers should call ib_umem_find_best_pgsz() to set the iova + * correctly. + */ + umem->iova = addr; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 82afdb1987eff6..22c8f5745047db 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -548,7 +548,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = ib_umem_num_pages(mhp->umem); + n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift); err = alloc_pbl(mhp, n); if (err) goto err_umem_release; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index c19ec9fd8a63c3..13de3d2edd34e3 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -169,8 +169,8 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int access_flags) { return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_pages(umem), pas, - access_flags); + ib_umem_num_dma_blocks(umem, PAGE_SIZE), + pas, access_flags); } int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) { diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 317e67ad915fe8..b785fb9a2634ff 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -877,7 +877,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err; } - n = ib_umem_num_pages(mr->umem); + n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE); mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index 91f0957e611543..e80848bfb3bdbf 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_CAST(umem); } - npages = ib_umem_num_pages(umem); + npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE); if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", npages); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index b880512ba95f16..0f1ab3d8f77dea 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -17,6 +17,7 @@ struct ib_umem_odp; struct ib_umem { struct ib_device *ibdev; struct mm_struct *owning_mm; + u64 iova; size_t length; unsigned long address; u32 writable : 1; @@ -33,11 +34,17 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, + unsigned long pgsz) +{ + return (ALIGN(umem->iova + umem->length, pgsz) - + ALIGN_DOWN(umem->iova, pgsz)) / + pgsz; +} + static inline size_t ib_umem_num_pages(struct ib_umem *umem) { - return (ALIGN(umem->address + umem->length, PAGE_SIZE) - - ALIGN_DOWN(umem->address, PAGE_SIZE)) >> - PAGE_SHIFT; + return ib_umem_num_dma_blocks(umem, PAGE_SIZE); } static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, @@ -55,6 +62,8 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The * returned DMA blocks will be aligned to pgsz and span the range: * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz) + * + * Performs exactly ib_umem_num_dma_blocks() iterations. */ #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ -- 2.28.0