Handle xgmi hive case. Suggested-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 753260745554..0093c28f4343 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1226,6 +1226,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + struct amdgpu_hive_info *hive; + int hive_ras_recovery = 0; if (!block_obj || !block_obj->hw_ops) { dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", @@ -1237,8 +1239,15 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, !amdgpu_ras_get_mca_debug_mode(adev)) return -EOPNOTSUPP; + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + /* skip ras error reset in gpu reset */ - if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery)) && + if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) || + hive_ras_recovery) && mca_funcs && mca_funcs->mca_set_debug_mode) return -EOPNOTSUPP; -- 2.35.1