Re: [PATCH v2 02/10] drm/amdgpu: fix sriov host flr handler

Christian König <christian.koenig@xxxxxxx> · Wed, 29 May 2024 08:41:02 +0200

Am 28.05.24 um 19:23 schrieb Yunxiang Li:
We send back the ready to reset message before we stop anything. This is
wrong. Move it to when we are actually ready for the FLR to happen.

In the current state since we take tens of seconds to stop everything,
it is very likely that host would give up waiting and reset the GPU
before we send ready, so it would be the same as before. But this gets
rid of the hack with reset_domain locking and also let us know how slow
the reset actually is on the host. The pre-reset speed can thus be
improved later.

Signed-off-by: Yunxiang Li <Yunxiang.Li@xxxxxxx>

It looks like a nice cleanup to me, but that is absolutely not my field 
of expertise.

But feel free to add an Acked-by: Christian König <christian.koenig@xxxxxxx>

Regards,
Christian.

---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 14 ++++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |  2 ++
  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c      | 37 ++++++++--------------
  drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c      | 37 ++++++++--------------
  drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c      |  6 ----
  6 files changed, 46 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bf1a6593dc5e..eb77b4ec3cb4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5069,6 +5069,8 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
  	struct amdgpu_hive_info *hive = NULL;
  
  	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
+		amdgpu_virt_ready_to_reset(adev);
+		amdgpu_virt_wait_reset(adev);
  		clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
  		r = amdgpu_virt_request_full_gpu(adev, true);
  	} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3cf8416f8cb0..44450507c140 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -152,6 +152,20 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
  		DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
  }
  
+/**
+ * amdgpu_virt_ready_to_reset() - send ready to reset to host
+ * @adev:	amdgpu device.
+ * Send ready to reset message to GPU hypervisor to signal we have stopped GPU
+ * activity and is ready for host FLR
+ */
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
+{
+	struct amdgpu_virt *virt = &adev->virt;
+
+	if (virt->ops && virt->ops->reset_gpu)
+		virt->ops->ready_to_reset(adev);
+}
+
  /**
   * amdgpu_virt_wait_reset() - wait for reset gpu completed
   * @adev:	amdgpu device.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 642f1fd287d8..66de5380d9a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -88,6 +88,7 @@ struct amdgpu_virt_ops {
  	int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
  	int (*req_init_data)(struct amdgpu_device *adev);
  	int (*reset_gpu)(struct amdgpu_device *adev);
+	void (*ready_to_reset)(struct amdgpu_device *adev);
  	int (*wait_reset)(struct amdgpu_device *adev);
  	void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
  			  u32 data1, u32 data2, u32 data3);
@@ -345,6 +346,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
  int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init);
  int amdgpu_virt_reset_gpu(struct amdgpu_device *adev);
  void amdgpu_virt_request_init_data(struct amdgpu_device *adev);
+void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev);
  int amdgpu_virt_wait_reset(struct amdgpu_device *adev);
  int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev);
  void amdgpu_virt_free_mm_table(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index f4c47492e0cd..3fdd1fc84723 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -249,38 +249,28 @@ static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
  	return 0;
  }
  
-static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+static void xgpu_ai_ready_to_reset(struct amdgpu_device *adev)
  {
-	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
-	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
-	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
-
-	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
-	 * otherwise the mailbox msg will be ruined/reseted by
-	 * the VF FLR.
-	 */
-	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
-		return;
-
-	down_write(&adev->reset_domain->sem);
-
-	amdgpu_virt_fini_data_exchange(adev);
-
  	xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
  
+static int xgpu_ai_wait_reset(struct amdgpu_device *adev)
+{
+	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
  	do {
  		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
-			goto flr_done;
-
+			return 0;
  		msleep(10);
  		timeout -= 10;
  	} while (timeout > 1);
-
  	dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n");
+	return -ETIME;
+}
  
-flr_done:
-	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
-	up_write(&adev->reset_domain->sem);
+static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+{
+	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
  
  	/* Trigger recovery for world switch failure if no TDR */
  	if (amdgpu_device_should_recover_gpu(adev)
@@ -417,7 +407,8 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
  	.req_full_gpu	= xgpu_ai_request_full_gpu_access,
  	.rel_full_gpu	= xgpu_ai_release_full_gpu_access,
  	.reset_gpu = xgpu_ai_request_reset,
-	.wait_reset = NULL,
+	.ready_to_reset = xgpu_ai_ready_to_reset,
+	.wait_reset = xgpu_ai_wait_reset,
  	.trans_msg = xgpu_ai_mailbox_trans_msg,
  	.req_init_data  = xgpu_ai_request_init_data,
  	.ras_poison_handler = xgpu_ai_ras_poison_handler,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 37b49a5ed2a1..cd6ec1afff2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -282,38 +282,28 @@ static int xgpu_nv_set_mailbox_ack_irq(struct amdgpu_device *adev,
  	return 0;
  }
  
-static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+static void xgpu_nv_ready_to_reset(struct amdgpu_device *adev)
  {
-	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
-	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
-	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
-
-	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
-	 * otherwise the mailbox msg will be ruined/reseted by
-	 * the VF FLR.
-	 */
-	if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0)
-		return;
-
-	down_write(&adev->reset_domain->sem);
-
-	amdgpu_virt_fini_data_exchange(adev);
-
  	xgpu_nv_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+}
  
+static int xgpu_nv_wait_reset(struct amdgpu_device *adev)
+{
+	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
  	do {
  		if (xgpu_nv_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
-			goto flr_done;
-
+			return 0;
  		msleep(10);
  		timeout -= 10;
  	} while (timeout > 1);
-
  	dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n");
+	return -ETIME;
+}
  
-flr_done:
-	atomic_set(&adev->reset_domain->in_gpu_reset, 0);
-	up_write(&adev->reset_domain->sem);
+static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
+{
+	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
  
  	/* Trigger recovery for world switch failure if no TDR */
  	if (amdgpu_device_should_recover_gpu(adev)
@@ -455,7 +445,8 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
  	.rel_full_gpu	= xgpu_nv_release_full_gpu_access,
  	.req_init_data  = xgpu_nv_request_init_data,
  	.reset_gpu = xgpu_nv_request_reset,
-	.wait_reset = NULL,
+	.ready_to_reset = xgpu_nv_ready_to_reset,
+	.wait_reset = xgpu_nv_wait_reset,
  	.trans_msg = xgpu_nv_mailbox_trans_msg,
  	.ras_poison_handler = xgpu_nv_ras_poison_handler,
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index 78cd07744ebe..e1d63bed84bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -515,12 +515,6 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
  	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
  	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
  
-	/* wait until RCV_MSG become 3 */
-	if (xgpu_vi_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
-		pr_err("failed to receive FLR_CMPL\n");
-		return;
-	}
-
  	/* Trigger recovery due to world switch failure */
  	if (amdgpu_device_should_recover_gpu(adev)) {
  		struct amdgpu_reset_context reset_context;