Add ring reset function callback for JPEG4_0_3 to recover from job timeouts without a full gpu reset. V2: - sched->ready flag shouldn't be modified by back-ends (Christian) - use drm_sched_wqueue_stop()/drm_sched_wqueue_start() instead (Alex) Signed-off-by: Sathishkumar S <sathishkumar.sundararaju@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 69 +++++++++++++++++++++--- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index be0b3b4c8690..b6168f27addd 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -204,14 +204,10 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ - adev->jpeg.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_jpeg_sysfs_reset_mask_init(adev); - if (r) - return r; - return 0; + return r; } /** @@ -231,6 +227,7 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) return r; amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -1099,6 +1096,65 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_3_wait_for_idle_on_inst(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring *r; + int ret, j; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + r = &adev->jpeg.inst[ring->me].ring_dec[j]; + + if (ring->pipe == r->pipe) + continue; + + /* pause work submission on this core */ + drm_sched_wqueue_stop(&r->sched); + } + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + r = &adev->jpeg.inst[ring->me].ring_dec[j]; + if (ring->pipe == r->pipe) + continue; + /* wait for idle on all cores except on the hung core */ + ret = SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC0_UVD_JRBC_STATUS, + jpeg_v4_0_3_core_reg_offset(j), + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + if (ret) + return ret; + } + + return 0; +} + +static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ring *r; + int ret, j; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + ret = jpeg_v4_0_3_wait_for_idle_on_inst(ring); + if (ret) + return ret; + + jpeg_v4_0_3_stop_inst(ring->adev, ring->me); + jpeg_v4_0_3_start_inst(ring->adev, ring->me); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + r = &adev->jpeg.inst[ring->me].ring_dec[j]; + jpeg_v4_0_3_start_jrbc(r); + ret = amdgpu_ring_test_helper(r); + if (ret) + return ret; + else if (ring->pipe != r->pipe) + drm_sched_wqueue_start(&r->sched); + } + return 0; +} + static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { .name = "jpeg_v4_0_3", .early_init = jpeg_v4_0_3_early_init, @@ -1145,6 +1201,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v4_0_3_ring_reset, }; static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) -- 2.25.1