From: Michael Guralnik <michaelgur@xxxxxxxxxx> Restrict the check for the number of pages handled during an ODP page fault to direct mkeys. Perform the check right after handling the page fault and don't propagate the number of handled pages to callers. Indirect mkeys and their associated direct mkeys can have different start addresses. As a result, the calculation of the number of pages to handle for an indirect mkey may not match the actual page fault handling done on the direct mkey. For example: A 4K sized page fault on a KSM mkey that has a start address that is not aligned to a page will result a calculation that assumes the number of pages required to handle are 2. While the underlying MTT might be aligned will require fetching only a single page. Thus, do the calculation and compare number of pages handled only per direct mkey. Fixes: db570d7deafb ("IB/mlx5: Add ODP support to MW") Signed-off-by: Michael Guralnik <michaelgur@xxxxxxxxxx> Reviewed-by: Artemy Kovalyov <artemyko@xxxxxxxxxx> Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxx> --- drivers/infiniband/hw/mlx5/odp.c | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e2468a602e3df..d33ecc37eafed 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -961,8 +961,7 @@ static struct mlx5_ib_mkey *find_odp_mkey(struct mlx5_ib_dev *dev, u32 key) /* * Handle a single data segment in a page-fault WQE or RDMA region. * - * Returns number of OS pages retrieved on success. The caller may continue to - * the next data segment. + * Returns zero on success. The caller may continue to the next data segment. * Can return the following error codes: * -EAGAIN to designate a temporary error. The caller will abort handling the * page fault and resolve it. @@ -975,7 +974,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; + int ret, i, outlen, cur_outlen = 0, depth = 0, pages_in_range; struct pf_frame *head = NULL, *frame; struct mlx5_ib_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -1010,13 +1009,20 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + pages_in_range = (ALIGN(io_virt + bcnt, PAGE_SIZE) - + (io_virt & PAGE_MASK)) >> + PAGE_SHIFT; ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false); if (ret < 0) goto end; mlx5_update_odp_stats_with_handled(mr, faults, ret); - npages += ret; + if (ret < pages_in_range) { + ret = -EFAULT; + goto end; + } + ret = 0; break; @@ -1107,7 +1113,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, kfree(out); *bytes_committed = 0; - return ret ? ret : npages; + return ret; } /* @@ -1126,8 +1132,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, * the committed bytes). * @receive_queue: receive WQE end of sg list * - * Returns the number of pages loaded if positive, zero for an empty WQE, or a - * negative error code. + * Returns zero for success or a negative error code. */ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, @@ -1135,7 +1140,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, void *wqe_end, u32 *bytes_mapped, u32 *total_wqe_bytes, bool receive_queue) { - int ret = 0, npages = 0; + int ret = 0; u64 io_virt; __be32 key; u32 byte_count; @@ -1192,10 +1197,9 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, bytes_mapped); if (ret < 0) break; - npages += ret; } - return ret < 0 ? ret : npages; + return ret; } /* @@ -1431,12 +1435,6 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, free_page((unsigned long)wqe_start); } -static int pages_in_range(u64 address, u32 length) -{ - return (ALIGN(address + length, PAGE_SIZE) - - (address & PAGE_MASK)) >> PAGE_SHIFT; -} - static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { @@ -1475,7 +1473,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; - } else if (ret < 0 || pages_in_range(address, length) > ret) { + } else if (ret < 0) { mlx5_ib_page_fault_resume(dev, pfault, 1); if (ret != -ENOENT) mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%llx, type: 0x%x\n", -- 2.47.1