Re: [PATCH v2] drm/amdgpu: dequeue mes scheduler during fini

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 13.10.22 um 12:39 schrieb YuBiao Wang:
Resend to fix coding style issue.

[Why]
If mes is not dequeued during fini, mes will be in an uncleaned state
during reload, then mes couldn't receive some commands which leads to
reload failure.

[How]
Perform MES dequeue via MMIO after all the unmap jobs are done by mes
and before kiq fini.

Signed-off-by: YuBiao Wang <YuBiao.Wang@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  3 ++
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  |  3 ++
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 47 +++++++++++++++++++++++--
  3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index ad980f4b66e1..ea8efa52503b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -130,6 +130,9 @@ struct amdgpu_mes {
  	int                             (*kiq_hw_init)(struct amdgpu_device *adev);
  	int                             (*kiq_hw_fini)(struct amdgpu_device *adev);
+ /* dequeue sched pipe via kiq */
+	void                            (*kiq_dequeue_sched)(struct amdgpu_device *adev);
+
  	/* ip specific functions */
  	const struct amdgpu_mes_funcs   *funcs;
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 257b2e4de600..7c75758f58e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -4406,6 +4406,9 @@ static int gfx_v11_0_hw_fini(void *handle)
  		if (amdgpu_gfx_disable_kcq(adev))
  			DRM_ERROR("KCQ disable failed\n");
+ if (adev->mes.ring.sched.ready && adev->mes.kiq_dequeue_sched)
+			adev->mes.kiq_dequeue_sched(adev);
+
  		amdgpu_mes_kiq_hw_fini(adev);
  	}
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index b48684db2832..837ff485dab6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -44,6 +44,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
  static int mes_v11_0_hw_fini(void *handle);
  static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
  static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
+static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev);
#define MES_EOP_SIZE 2048 @@ -1078,6 +1079,7 @@ static int mes_v11_0_sw_init(void *handle)
  	adev->mes.funcs = &mes_v11_0_funcs;
  	adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init;
  	adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini;
+	adev->mes.kiq_dequeue_sched = &mes_v11_0_kiq_dequeue_sched;
r = amdgpu_mes_init(adev);
  	if (r)
@@ -1151,6 +1153,42 @@ static int mes_v11_0_sw_fini(void *handle)
  	return 0;
  }
+static void mes_v11_0_kiq_dequeue_sched(struct amdgpu_device *adev)
+{
+	uint32_t data;
+	int i;
+
+	mutex_lock(&adev->srbm_mutex);
+	soc21_grbm_select(adev, 3, AMDGPU_MES_SCHED_PIPE, 0, 0);
+
+	/* disable the queue if it's active */
+	if (RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1) {
+		WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 1);
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
+		}
+	}
+	data = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL);
+	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+				DOORBELL_EN, 0);
+	data = REG_SET_FIELD(data, CP_HQD_PQ_DOORBELL_CONTROL,
+				DOORBELL_HIT, 1);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL, 0);
+
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_LO, 0);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_WPTR_HI, 0);
+	WREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR, 0);
+
+	soc21_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+
+	adev->mes.ring.sched.ready = false;
+}
+
  static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring)
  {
  	uint32_t tmp;
@@ -1257,9 +1295,12 @@ static int mes_v11_0_hw_init(void *handle)
static int mes_v11_0_hw_fini(void *handle)
  {
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	adev->mes.ring.sched.ready = false;
+	/*
+	 * Do not disable mes.ring.sched.ready here, since mes
+	 * is still used to unmap other rings.
+	 * We'll set mes.ring.sched.ready to false when mes ring
+	 * is dequeued.
+	 */

Maybe drop that comment. The driver should *NEVER* set ring.sched.read to false, but only to true.

The background is that this flag is used if a ring was correctly initialized from the SW side. It's *not* related to the hardware state, but only set to true after the hardware is tested.

We have gotten this wrong so many times and I had to write patches to remove it.

The only exception so far to this is the KIQ ring because here we don't use it for CS. MES might get the same exception, but not as long as we don't need it somewhere.

Regards,
Christian.

  	return 0;
  }




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux