Am 19.07.2016 um 10:13 schrieb Chunming Zhou: > Check gpu status first, if MC/VMC/DISPLAY hang, directly triger full reset. > If engine hangs, then triger engine soft reset, if soft reset fails, will > fallback to full reset. > > Change-Id: I6f946db3624cd950e11e669f5dc80be58dad4711 > Signed-off-by: Chunming Zhou <David1.Zhou at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 42 +++++++++++++++++++++++++++++- > 1 file changed, 41 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 97957ab..4521725 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -1976,7 +1976,8 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) > for (i = 0; i < adev->num_ip_blocks; i++) { > if (!adev->ip_block_status[i].valid) > continue; > - if (adev->ip_blocks[i].funcs->pre_soft_reset) > + if (adev->ip_block_status[i].hang && > + adev->ip_blocks[i].funcs->pre_soft_reset) > r = adev->ip_blocks[i].funcs->pre_soft_reset(adev); > if (r) > return r; > @@ -1985,6 +1986,38 @@ int amdgpu_pre_soft_reset(struct amdgpu_device *adev) > return 0; > } > > +static bool amdgpu_need_full_reset(struct amdgpu_device *adev) > +{ > + if (adev->ip_block_status[AMD_IP_BLOCK_TYPE_GMC].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_IH].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SMC].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_GFX].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_SDMA].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_UVD].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_VCE].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_ACP].hang || > + adev->ip_block_status[AMD_IP_BLOCK_TYPE_DCE].hang) > + return true; > + return false; > +} > + > +static int amdgpu_soft_reset(struct amdgpu_device *adev) > +{ > + int i, r = 0; > + > + for (i = 0; i < adev->num_ip_blocks; i++) { > + if (!adev->ip_block_status[i].valid) > + continue; > + if (adev->ip_block_status[i].hang && > + adev->ip_blocks[i].funcs->soft_reset) > + r = adev->ip_blocks[i].funcs->soft_reset(adev); > + if (r) > + return r; Same as with patch #3, please move the return code checking into the if. > + } > + > + return 0; > +} > + > /** > * amdgpu_gpu_reset - reset the asic > * > @@ -2024,6 +2057,12 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) > if (amdgpu_device_has_dal_support(adev)) > state = drm_atomic_helper_suspend(adev->ddev); > > + if (!amdgpu_need_full_reset(adev)) { > + amdgpu_pre_soft_reset(adev); > + r = amdgpu_soft_reset(adev); > + if (!amdgpu_check_soft_reset(adev)) > + goto out; This should probably be "if (r || !amdgpu_check_soft_reset(adev))...". Christian. > + } > /* save scratch */ > amdgpu_atombios_scratch_regs_save(adev); > r = amdgpu_suspend(adev); > @@ -2045,6 +2084,7 @@ retry: > } > /* restore scratch */ > amdgpu_atombios_scratch_regs_restore(adev); > +out: > if (!r) { > r = amdgpu_ib_ring_tests(adev); > if (r) {