[PATCH rdma-next 09/10] IB/mlx5: Extract page fault code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Artemy Kovalyov <artemyko@xxxxxxxxxxxx>

To make page fault handling code more flexible
split pagefault_single_data_segment() function.
Keep MR resolution in pagefault_single_data_segment() and
move actual updates into pagefault_single_mr().

Signed-off-by: Artemy Kovalyov <artemyko@xxxxxxxxxxxx>
Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx>
---
 drivers/infiniband/hw/mlx5/odp.c | 203 ++++++++++++++++++++-------------------
 1 file changed, 104 insertions(+), 99 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index eddabd6e6596..842e1dbb50b8 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -511,81 +511,38 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
 	wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
 }
 
-/*
- * Handle a single data segment in a page-fault WQE or RDMA region.
- *
- * Returns number of OS pages retrieved on success. The caller may continue to
- * the next data segment.
- * Can return the following error codes:
- * -EAGAIN to designate a temporary error. The caller will abort handling the
- *  page fault and resolve it.
- * -EFAULT when there's an error mapping the requested pages. The caller will
- *  abort the page fault handling.
- */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
-					 u32 key, u64 io_virt, size_t bcnt,
-					 u32 *bytes_committed,
-					 u32 *bytes_mapped)
+static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+			u64 io_virt, size_t bcnt, u32 *bytes_mapped)
 {
-	int srcu_key;
-	unsigned int current_seq = 0;
-	u64 start_idx, page_mask;
-	int npages = 0, ret = 0;
-	struct mlx5_ib_mr *mr;
 	u64 access_mask = ODP_READ_ALLOWED_BIT;
+	int npages = 0, page_shift, np;
+	u64 start_idx, page_mask;
 	struct ib_umem_odp *odp;
-	int implicit = 0;
+	int current_seq;
 	size_t size;
-	int page_shift;
-
-	srcu_key = srcu_read_lock(&dev->mr_srcu);
-	mr = mlx5_ib_odp_find_mr_lkey(dev, key);
-	/*
-	 * If we didn't find the MR, it means the MR was closed while we were
-	 * handling the ODP event. In this case we return -EFAULT so that the
-	 * QP will be closed.
-	 */
-	if (!mr || !mr->ibmr.pd) {
-		mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
-			    key);
-		ret = -EFAULT;
-		goto srcu_unlock;
-	}
-	if (!mr->umem->odp_data) {
-		mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
-			    key);
-		if (bytes_mapped)
-			*bytes_mapped +=
-				(bcnt - *bytes_committed);
-		goto srcu_unlock;
-	}
-
-	/*
-	 * Avoid branches - this code will perform correctly
-	 * in all iterations (in iteration 2 and above,
-	 * bytes_committed == 0).
-	 */
-	io_virt += *bytes_committed;
-	bcnt -= *bytes_committed;
+	int ret;
 
 	if (!mr->umem->odp_data->page_list) {
 		odp = implicit_mr_get_data(mr, io_virt, bcnt);
 
-		if (IS_ERR(odp)) {
-			ret = PTR_ERR(odp);
-			goto srcu_unlock;
-		}
+		if (IS_ERR(odp))
+			return PTR_ERR(odp);
 		mr = odp->private;
-		implicit = 1;
 
 	} else {
 		odp = mr->umem->odp_data;
 	}
 
+next_mr:
+	size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
+
 	page_shift = mr->umem->page_shift;
 	page_mask = ~(BIT(page_shift) - 1);
+	start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
+
+	if (mr->umem->writable)
+		access_mask |= ODP_WRITE_ALLOWED_BIT;
 
-next_mr:
 	current_seq = READ_ONCE(odp->notifiers_seq);
 	/*
 	 * Ensure the sequence number is valid for some time before we call
@@ -593,51 +550,43 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 	 */
 	smp_rmb();
 
-	size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
-	start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
-
-	if (mr->umem->writable)
-		access_mask |= ODP_WRITE_ALLOWED_BIT;
-
 	ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
 					access_mask, current_seq);
 
 	if (ret < 0)
-		goto srcu_unlock;
+		goto out;
 
-	if (ret > 0) {
-		int np = ret;
-
-		mutex_lock(&odp->umem_mutex);
-		if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
-			/*
-			 * No need to check whether the MTTs really belong to
-			 * this MR, since ib_umem_odp_map_dma_pages already
-			 * checks this.
-			 */
-			ret = mlx5_ib_update_xlt(mr, start_idx, np,
-						 page_shift,
-						 MLX5_IB_UPD_XLT_ATOMIC);
-		} else {
-			ret = -EAGAIN;
-		}
-		mutex_unlock(&odp->umem_mutex);
-		if (ret < 0) {
-			if (ret != -EAGAIN)
-				mlx5_ib_err(dev, "Failed to update mkey page tables\n");
-			goto srcu_unlock;
-		}
-		if (bytes_mapped) {
-			u32 new_mappings = (np << page_shift) -
-				(io_virt - round_down(io_virt,
-						      1 << page_shift));
-			*bytes_mapped += min_t(u32, new_mappings, size);
-		}
+	np = ret;
 
-		npages += np << (page_shift - PAGE_SHIFT);
+	mutex_lock(&odp->umem_mutex);
+	if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+		/*
+		 * No need to check whether the MTTs really belong to
+		 * this MR, since ib_umem_odp_map_dma_pages already
+		 * checks this.
+		 */
+		ret = mlx5_ib_update_xlt(mr, start_idx, np,
+					 page_shift, MLX5_IB_UPD_XLT_ATOMIC);
+	} else {
+		ret = -EAGAIN;
 	}
+	mutex_unlock(&odp->umem_mutex);
 
+	if (ret < 0) {
+		if (ret != -EAGAIN)
+			mlx5_ib_err(dev, "Failed to update mkey page tables\n");
+		goto out;
+	}
+
+	if (bytes_mapped) {
+		u32 new_mappings = (np << page_shift) -
+			(io_virt - round_down(io_virt, 1 << page_shift));
+		*bytes_mapped += min_t(u32, new_mappings, size);
+	}
+
+	npages += np << (page_shift - PAGE_SHIFT);
 	bcnt -= size;
+
 	if (unlikely(bcnt)) {
 		struct ib_umem_odp *next;
 
@@ -646,17 +595,18 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 		if (unlikely(!next || next->umem->address != io_virt)) {
 			mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
 				    io_virt, next);
-			ret = -EAGAIN;
-			goto srcu_unlock_no_wait;
+			return -EAGAIN;
 		}
 		odp = next;
 		mr = odp->private;
 		goto next_mr;
 	}
 
-srcu_unlock:
+	return npages;
+
+out:
 	if (ret == -EAGAIN) {
-		if (implicit || !odp->dying) {
+		if (mr->parent || !odp->dying) {
 			unsigned long timeout =
 				msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
 
@@ -672,7 +622,62 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 		}
 	}
 
-srcu_unlock_no_wait:
+	return ret;
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE or RDMA region.
+ *
+ * Returns number of OS pages retrieved on success. The caller may continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ *  page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ *  abort the page fault handling.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
+					 u32 key, u64 io_virt, size_t bcnt,
+					 u32 *bytes_committed,
+					 u32 *bytes_mapped)
+{
+	int npages = 0, srcu_key, ret;
+	struct mlx5_ib_mr *mr;
+	size_t size;
+
+	srcu_key = srcu_read_lock(&dev->mr_srcu);
+	mr = mlx5_ib_odp_find_mr_lkey(dev, key);
+	/*
+	 * If we didn't find the MR, it means the MR was closed while we were
+	 * handling the ODP event. In this case we return -EFAULT so that the
+	 * QP will be closed.
+	 */
+	if (!mr || !mr->ibmr.pd) {
+		mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+			    key);
+		ret = -EFAULT;
+		goto srcu_unlock;
+	}
+	if (!mr->umem->odp_data) {
+		mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+			    key);
+		if (bytes_mapped)
+			*bytes_mapped +=
+				(bcnt - *bytes_committed);
+		goto srcu_unlock;
+	}
+
+	/*
+	 * Avoid branches - this code will perform correctly
+	 * in all iterations (in iteration 2 and above,
+	 * bytes_committed == 0).
+	 */
+	io_virt += *bytes_committed;
+	bcnt -= *bytes_committed;
+
+	npages = pagefault_mr(dev, mr, io_virt, size, bytes_mapped);
+
+srcu_unlock:
 	srcu_read_unlock(&dev->mr_srcu, srcu_key);
 	*bytes_committed = 0;
 	return ret ? ret : npages;
-- 
2.12.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux