Recognize the new access flag and call the core function to import dma-buf based memory region. Since the invalidation callback is called from the dma-buf driver instead of mmu_interval_notifier, the part inside the ODP pagefault handler that checks for on-going invalidation is modified to handle the difference. Signed-off-by: Jianxin Xiong <jianxin.xiong@xxxxxxxxx> Reviewed-by: Sean Hefty <sean.hefty@xxxxxxxxx> Acked-by: Michael J. Ruhl <michael.j.ruhl@xxxxxxxxx> --- drivers/infiniband/hw/mlx5/mr.c | 50 +++++++++++++++++++++++++++++++++++++--- drivers/infiniband/hw/mlx5/odp.c | 22 ++++++++++++++++-- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3c91e32..d58be20 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -912,6 +912,44 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, return 0; } +static int mr_umem_dmabuf_get(struct mlx5_ib_dev *dev, u64 start, u64 length, + int dmabuf_fd, int access_flags, + struct ib_umem **umem, int *npages, + int *page_shift, int *ncont, int *order) +{ + struct ib_umem *u; + struct ib_umem_odp *odp; + + *umem = NULL; + + u = ib_umem_dmabuf_get(&dev->ib_dev, start, length, dmabuf_fd, + access_flags, &mlx5_mn_ops); + if (IS_ERR(u)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); + return PTR_ERR(u); + } + + odp = to_ib_umem_odp(u); + *page_shift = odp->page_shift; + *ncont = ib_umem_odp_num_pages(odp); + *npages = *ncont << (*page_shift - PAGE_SHIFT); + if (order) + *order = ilog2(roundup_pow_of_two(*ncont)); + + if (!*npages) { + mlx5_ib_warn(dev, "avoid zero region\n"); + ib_umem_release(u); + return -EINVAL; + } + + *umem = u; + + mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", + *npages, *ncont, *order, *page_shift); + + return 0; +} + static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -1382,9 +1420,15 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, return &mr->ibmr; } - err = mr_umem_get(dev, attr->start, attr->length, - attr->access_flags, &umem, - &npages, &page_shift, &ncont, &order); + if (attr->access_flags & IB_ACCESS_DMABUF) + err = mr_umem_dmabuf_get(dev, attr->start, attr->length, + attr->fd, attr->access_flags, + &umem, &npages, &page_shift, &ncont, + &order); + else + err = mr_umem_get(dev, attr->start, attr->length, + attr->access_flags, &umem, + &npages, &page_shift, &ncont, &order); if (err < 0) return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index cfd7efa..f2ca3f8 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -665,6 +665,24 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) dma_fence_odp_mr(mr); } +static inline unsigned long notifier_read_begin(struct ib_umem_odp *odp) +{ + if (odp->umem.is_dmabuf) + return ib_umem_dmabuf_notifier_read_begin(odp); + else + return mmu_interval_read_begin(&odp->notifier); +} + +static inline int notifier_read_retry(struct ib_umem_odp *odp, + unsigned long current_seq) +{ + if (odp->umem.is_dmabuf) { + return ib_umem_dmabuf_notifier_read_retry(odp, current_seq); + } else { + return mmu_interval_read_retry(&odp->notifier, current_seq); + } +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, @@ -683,7 +701,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; - current_seq = mmu_interval_read_begin(&odp->notifier); + current_seq = notifier_read_begin(odp); np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, current_seq); @@ -691,7 +709,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, return np; mutex_lock(&odp->umem_mutex); - if (!mmu_interval_read_retry(&odp->notifier, current_seq)) { + if (!notifier_read_retry(odp, current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already -- 1.8.3.1