PMFW is responsible for RAS error reset in some conditions, driver can skip the operation. v2: add check for ras->in_recovery, it's set earlier than amdgpu_in_reset. Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 95c7ba889e2d..806c6d4deb63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1178,11 +1178,19 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block block) { struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; if (!block_obj || !block_obj->hw_ops) return 0; - if (!amdgpu_ras_is_supported(adev, block)) + /* skip ras error reset in gpu reset */ + if (amdgpu_in_reset(adev) && atomic_read(&ras->in_recovery) && + mca_funcs && mca_funcs->mca_set_debug_mode) + return 0; + + if (!amdgpu_ras_is_supported(adev, block) || + !amdgpu_ras_get_mca_debug_mode(adev)) return 0; if (block_obj->hw_ops->reset_ras_error_count) @@ -1195,6 +1203,8 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, enum amdgpu_ras_block block) { struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; if (!block_obj || !block_obj->hw_ops) { dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", @@ -1202,7 +1212,13 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, return 0; } - if (!amdgpu_ras_is_supported(adev, block)) + /* skip ras error reset in gpu reset */ + if (amdgpu_in_reset(adev) && atomic_read(&ras->in_recovery) && + mca_funcs && mca_funcs->mca_set_debug_mode) + return 0; + + if (!amdgpu_ras_is_supported(adev, block) || + !amdgpu_ras_get_mca_debug_mode(adev)) return 0; if (block_obj->hw_ops->reset_ras_error_count) -- 2.35.1