This athub fatal error can be recovered by baco without system-level reboot, so add a mode to use baco for the recovery. Not affect the default psp reset situations for now. Change-Id: Ib17f2a39254ff6b0473a785752adfdfea79d0e0d Signed-off-by: Le Ma <le.ma@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 71abfe9..53e9590 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4021,12 +4021,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_device *tmp_adev = NULL; int i, r = 0; bool in_ras_intr = amdgpu_ras_intr_triggered(); + bool use_baco = + (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? + true : false; /* * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { + if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); @@ -4037,7 +4040,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, need_full_reset = job_signaled = false; INIT_LIST_HEAD(&device_list); - dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); + dev_info(adev->dev, "GPU %s begin!\n", + (in_ras_intr && !use_baco) ? "jobs stop":"reset"); cancel_delayed_work_sync(&adev->delayed_init_work); @@ -4104,7 +4108,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_unregister_gpu_instance(tmp_adev); /* disable ras on ALL IPs */ - if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) + if (!(in_ras_intr && !use_baco) && + amdgpu_device_ip_need_full_reset(tmp_adev)) amdgpu_ras_suspend(tmp_adev); for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -4115,13 +4120,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_sched_stop(&ring->sched, job ? &job->base : NULL); - if (in_ras_intr) + if (in_ras_intr && !use_baco) amdgpu_job_stop_all_jobs_on_sched(&ring->sched); } } - if (in_ras_intr) + if (in_ras_intr && !use_baco) goto skip_sched_resume; /* @@ -4214,7 +4219,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, skip_sched_resume: list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { /*unlock kfd: SRIOV would do it separately */ - if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) + if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev)) amdgpu_amdkfd_post_reset(tmp_adev); amdgpu_device_unlock_adev(tmp_adev); } -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx