Re: [PATCH v2 2/7] drm/amdgpu: Add ring reset callback for JPEG4_0_3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 10.02.25 um 15:25 schrieb Sathishkumar S:
Add ring reset function callback for JPEG4_0_3 to
recover from job timeouts without a full gpu reset.

V2:
  - sched->ready flag shouldn't be modified by back-ends (Christian)
  - use drm_sched_wqueue_stop()/drm_sched_wqueue_start() instead (Alex)

Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 69 +++++++++++++++++++++---
  1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index be0b3b4c8690..b6168f27addd 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -204,14 +204,10 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block)
  	if (r)
  		return r;
- /* TODO: Add queue reset mask when FW fully supports it */
-	adev->jpeg.supported_reset =
-		amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]);
+	adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE;
  	r = amdgpu_jpeg_sysfs_reset_mask_init(adev);
-	if (r)
-		return r;
- return 0;
+	return r;
  }
/**
@@ -231,6 +227,7 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block)
  		return r;
amdgpu_jpeg_sysfs_reset_mask_fini(adev);
+
  	r = amdgpu_jpeg_sw_fini(adev);
return r;
@@ -1099,6 +1096,65 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev,
  	return 0;
  }
+static int jpeg_v4_0_3_wait_for_idle_on_inst(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring *r;
+	int ret, j;
+
+	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+		r = &adev->jpeg.inst[ring->me].ring_dec[j];
+
+		if (ring->pipe == r->pipe)
+			continue;
+
+		/* pause work submission on this core */
+		drm_sched_wqueue_stop(&r->sched);

Again complete NAK for that!

A HW backend should *never* mess with the scheduler front end whatsoever.

What exactly is the requirement here? Do we need to make sure that the JPEG engine isn't touched by the scheduler at all?

Regards,
Christian.

+	}
+	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+		r = &adev->jpeg.inst[ring->me].ring_dec[j];
+		if (ring->pipe == r->pipe)
+			continue;
+		/* wait for idle on all cores except on the hung core */
+		ret = SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, ring->me),
+						regUVD_JRBC0_UVD_JRBC_STATUS,
+						jpeg_v4_0_3_core_reg_offset(j),
+						UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK,
+						UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring *r;
+	int ret, j;
+
+	if (amdgpu_sriov_vf(adev))
+		return -EINVAL;
+
+	ret = jpeg_v4_0_3_wait_for_idle_on_inst(ring);
+	if (ret)
+		return ret;
+
+	jpeg_v4_0_3_stop_inst(ring->adev, ring->me);
+	jpeg_v4_0_3_start_inst(ring->adev, ring->me);
+	for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) {
+		r = &adev->jpeg.inst[ring->me].ring_dec[j];
+		jpeg_v4_0_3_start_jrbc(r);
+		ret = amdgpu_ring_test_helper(r);
+		if (ret)
+			return ret;
+		else if (ring->pipe != r->pipe)
+			drm_sched_wqueue_start(&r->sched);
+	}
+	return 0;
+}
+
  static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
  	.name = "jpeg_v4_0_3",
  	.early_init = jpeg_v4_0_3_early_init,
@@ -1145,6 +1201,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
  	.emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg,
  	.emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait,
  	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
+	.reset = jpeg_v4_0_3_ring_reset,
  };
static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev)




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux