From: Yishai Hadas <yishaih@xxxxxxxxxx> Extend advice MR to support non faulting mode, this improves performance by eliminating page faults and bring only the existing CPU pages. Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx> --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- drivers/infiniband/hw/mlx5/odp.c | 7 ++++++- include/uapi/rdma/ib_user_ioctl_verbs.h | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index eac869340158..1a82a57fc415 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1312,7 +1312,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct uverbs_attr_bundle *attrs) { if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && - advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) return -EOPNOTSUPP; return mlx5_ib_advise_mr_prefetch(pd, advice, flags, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 5bd5e19d76a2..28b7227d31bf 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -665,6 +665,7 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) +#define MLX5_PF_FLAGS_SNAPSHOT BIT(2) static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, u32 flags) @@ -673,6 +674,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; u64 access_mask; u64 start_idx; + bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT); page_shift = odp->page_shift; start_idx = (user_va - ib_umem_start(odp)) >> page_shift; @@ -681,7 +683,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; - np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, true); + np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault); if (np < 0) return np; @@ -1851,6 +1853,9 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) + pf_flags |= MLX5_PF_FLAGS_SNAPSHOT; + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index cfea82acfe57..22483799cd07 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -208,6 +208,7 @@ enum ib_uverbs_read_counters_flags { enum ib_uverbs_advise_mr_advice { IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH, IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT, }; enum ib_uverbs_advise_mr_flag { -- 2.26.2