Patch "RDMA/mlx5: Fix the recovery flow of the UMR QP" has been added to the 6.1-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    RDMA/mlx5: Fix the recovery flow of the UMR QP

to the 6.1-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     rdma-mlx5-fix-the-recovery-flow-of-the-umr-qp.patch
and it can be found in the queue-6.1 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit 15ed43c7d41f9929ea55919272003c7ba5aec402
Author: Yishai Hadas <yishaih@xxxxxxxxxx>
Date:   Sun Jan 19 14:36:13 2025 +0200

    RDMA/mlx5: Fix the recovery flow of the UMR QP
    
    [ Upstream commit d97505baea64d93538b16baf14ce7b8c1fbad746 ]
    
    This patch addresses an issue in the recovery flow of the UMR QP,
    ensuring tasks do not get stuck, as highlighted by the call trace [1].
    
    During recovery, before transitioning the QP to the RESET state, the
    software must wait for all outstanding WRs to complete.
    
    Failing to do so can cause the firmware to skip sending some flushed
    CQEs with errors and simply discard them upon the RESET, as per the IB
    specification.
    
    This race condition can result in lost CQEs and tasks becoming stuck.
    
    To resolve this, the patch sends a final WR which serves only as a
    barrier before moving the QP state to RESET.
    
    Once a CQE is received for that final WR, it guarantees that no
    outstanding WRs remain, making it safe to transition the QP to RESET and
    subsequently back to RTS, restoring proper functionality.
    
    Note:
    For the barrier WR, we simply reuse the failed and ready WR.
    Since the QP is in an error state, it will only receive
    IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier we don't
    care about its status.
    
    [1]
    INFO: task rdma_resource_l:1922 blocked for more than 120 seconds.
    Tainted: G        W          6.12.0-rc7+ #1626
    "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
    task:rdma_resource_l state:D stack:0  pid:1922 tgid:1922  ppid:1369
         flags:0x00004004
    Call Trace:
    <TASK>
    __schedule+0x420/0xd30
    schedule+0x47/0x130
    schedule_timeout+0x280/0x300
    ? mark_held_locks+0x48/0x80
    ? lockdep_hardirqs_on_prepare+0xe5/0x1a0
    wait_for_completion+0x75/0x130
    mlx5r_umr_post_send_wait+0x3c2/0x5b0 [mlx5_ib]
    ? __pfx_mlx5r_umr_done+0x10/0x10 [mlx5_ib]
    mlx5r_umr_revoke_mr+0x93/0xc0 [mlx5_ib]
    __mlx5_ib_dereg_mr+0x299/0x520 [mlx5_ib]
    ? _raw_spin_unlock_irq+0x24/0x40
    ? wait_for_completion+0xfe/0x130
    ? rdma_restrack_put+0x63/0xe0 [ib_core]
    ib_dereg_mr_user+0x5f/0x120 [ib_core]
    ? lock_release+0xc6/0x280
    destroy_hw_idr_uobject+0x1d/0x60 [ib_uverbs]
    uverbs_destroy_uobject+0x58/0x1d0 [ib_uverbs]
    uobj_destroy+0x3f/0x70 [ib_uverbs]
    ib_uverbs_cmd_verbs+0x3e4/0xbb0 [ib_uverbs]
    ? __pfx_uverbs_destroy_def_handler+0x10/0x10 [ib_uverbs]
    ? __lock_acquire+0x64e/0x2080
    ? mark_held_locks+0x48/0x80
    ? find_held_lock+0x2d/0xa0
    ? lock_acquire+0xc1/0x2f0
    ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
    ? __fget_files+0xc3/0x1b0
    ib_uverbs_ioctl+0xe7/0x170 [ib_uverbs]
    ? ib_uverbs_ioctl+0xcb/0x170 [ib_uverbs]
    __x64_sys_ioctl+0x1b0/0xa70
    do_syscall_64+0x6b/0x140
    entry_SYSCALL_64_after_hwframe+0x76/0x7e
    RIP: 0033:0x7f99c918b17b
    RSP: 002b:00007ffc766d0468 EFLAGS: 00000246 ORIG_RAX:
         0000000000000010
    RAX: ffffffffffffffda RBX: 00007ffc766d0578 RCX:
         00007f99c918b17b
    RDX: 00007ffc766d0560 RSI: 00000000c0181b01 RDI:
         0000000000000003
    RBP: 00007ffc766d0540 R08: 00007f99c8f99010 R09:
         000000000000bd7e
    R10: 00007f99c94c1c70 R11: 0000000000000246 R12:
         00007ffc766d0530
    R13: 000000000000001c R14: 0000000040246a80 R15:
         0000000000000000
    </TASK>
    
    Fixes: 158e71bb69e3 ("RDMA/mlx5: Add a umr recovery flow")
    Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx>
    Reviewed-by: Michael Guralnik <michaelgur@xxxxxxxxxx>
    Link: https://patch.msgid.link/27b51b92ec42dfb09d8096fcbd51878f397ce6ec.1737290141.git.leon@xxxxxxxxxx
    Signed-off-by: Leon Romanovsky <leon@xxxxxxxxxx>
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index fa000182d0b41..1a39e86178ece 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -199,30 +199,6 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
 	ib_dealloc_pd(dev->umrc.pd);
 }
 
-static int mlx5r_umr_recover(struct mlx5_ib_dev *dev)
-{
-	struct umr_common *umrc = &dev->umrc;
-	struct ib_qp_attr attr;
-	int err;
-
-	attr.qp_state = IB_QPS_RESET;
-	err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
-	if (err) {
-		mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
-		goto err;
-	}
-
-	err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
-	if (err)
-		goto err;
-
-	umrc->state = MLX5_UMR_STATE_ACTIVE;
-	return 0;
-
-err:
-	umrc->state = MLX5_UMR_STATE_ERR;
-	return err;
-}
 
 static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
 			       struct mlx5r_umr_wqe *wqe, bool with_data)
@@ -270,6 +246,61 @@ static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
 	return err;
 }
 
+static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey,
+			     struct mlx5r_umr_context *umr_context,
+			     struct mlx5r_umr_wqe *wqe, bool with_data)
+{
+	struct umr_common *umrc = &dev->umrc;
+	struct ib_qp_attr attr;
+	int err;
+
+	mutex_lock(&umrc->lock);
+	/* Preventing any further WRs to be sent now */
+	if (umrc->state != MLX5_UMR_STATE_RECOVER) {
+		mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n",
+			     umrc->state);
+		umrc->state = MLX5_UMR_STATE_RECOVER;
+	}
+	mutex_unlock(&umrc->lock);
+
+	/* Sending a final/barrier WR (the failed one) and wait for its completion.
+	 * This will ensure that all the previous WRs got a completion before
+	 * we set the QP state to RESET.
+	 */
+	err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe,
+				  with_data);
+	if (err) {
+		mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err);
+		goto err;
+	}
+
+	/* Since the QP is in an error state, it will only receive
+	 * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier
+	 * we don't care about its status.
+	 */
+	wait_for_completion(&umr_context->done);
+
+	attr.qp_state = IB_QPS_RESET;
+	err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE);
+	if (err) {
+		mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err);
+		goto err;
+	}
+
+	err = mlx5r_umr_qp_rst2rts(dev, umrc->qp);
+	if (err) {
+		mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err);
+		goto err;
+	}
+
+	umrc->state = MLX5_UMR_STATE_ACTIVE;
+	return 0;
+
+err:
+	umrc->state = MLX5_UMR_STATE_ERR;
+	return err;
+}
+
 static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
 {
 	struct mlx5_ib_umr_context *context =
@@ -334,9 +365,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
 		mlx5_ib_warn(dev,
 			"reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
 			umr_context.status, mkey);
-		mutex_lock(&umrc->lock);
-		err = mlx5r_umr_recover(dev);
-		mutex_unlock(&umrc->lock);
+		err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data);
 		if (err)
 			mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n",
 				     err);




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux