Check gpu status first, if MC/VMC/DISPLAY hang, directly triger full reset. If engine hangs, then triger engine soft reset, if soft reset fails, will fallback to full reset. Change-Id: I6f946db3624cd950e11e669f5dc80be58dad4711 Signed-off-by: Chunming Zhou <David1.Zhou at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 42 +++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 97957ab..4521725 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1976,7 +1976,8 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_block_status[i].valid) continue; - if (adev->ip_blocks[i].funcs->pre_soft_reset) + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->pre_soft_reset) r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); if (r) return r; @@ -1985,6 +1986,38 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) return 0; } +static bool amdgpu_need_full_reset(struct amdgpu_device *adev) +{ + if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || + adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) + return true; + return false; +} + +static int amdgpu_soft_reset(struct amdgpu_device *adev) +{ + int i, r = 0; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_block_status[i].valid) + continue; + if (adev->ip_block_status[i].hang && + adev->ip_blocks[i].funcs->soft_reset) + r = adev->ip_blocks[i].funcs->soft_reset(adev); + if (r) + return r; + } + + return 0; +} + /** * amdgpu_gpu_reset - reset the asic * @@ -2024,6 +2057,12 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) if (amdgpu_device_has_dal_support(adev)) state = drm_atomic_helper_suspend(adev->ddev); + if (!amdgpu_need_full_reset(adev)) { + amdgpu_pre_soft_reset(adev); + r = amdgpu_soft_reset(adev); + if (!amdgpu_check_soft_reset(adev)) + goto out; + } /* save scratch */ amdgpu_atombios_scratch_regs_save(adev); r = amdgpu_suspend(adev); @@ -2045,6 +2084,7 @@ retry: } /* restore scratch */ amdgpu_atombios_scratch_regs_restore(adev); +out: if (!r) { r = amdgpu_ib_ring_tests(adev); if (r) { -- 1.9.1