[Why] Previous tdr design treats the first job in job_timeout as the bad job. But sometimes a later bad compute job can block a good gfx job and cause an unexpected gfx job timeout because gfx and compute ring share internal GC HW mutually. [How] This patch implements an advanced tdr mode. 1.It synchronized resubmit wait step to find the real bad job. If the job's hw fence get timeout, we decrease the old job's karma, treat the new found one as a guilty one,do hw reset to recover hw. After that, we conitue the resubmit step to resubmit the left jobs. 2. For whole gpu reset(vram lost), do resubmit as the old style. Signed-off-by: Jack Zhang <Jack.Zhang1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 57 ++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 33 +++++++++++++ 3 files changed, 88 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e247c3a2ec08..fa53c6c00ee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4639,7 +4639,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int i, r = 0; bool need_emergency_restart = false; bool audio_suspended = false; - + int tmp_vram_lost_counter; /* * Special case: RAS triggered and full reset isn't supported */ @@ -4788,6 +4788,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, } } + tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter)); /* Actual ASIC resets if needed.*/ /* TODO Implement XGMI hive reset logic for SRIOV */ if (amdgpu_sriov_vf(adev)) { @@ -4807,17 +4808,67 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; + int ret = 0; + struct drm_sched_job *s_job = NULL; if (!ring || !ring->sched.thread) continue; /* No point to resubmit jobs if we didn't HW reset*/ - if (!tmp_adev->asic_reset_res && !job_signaled) + if (!tmp_adev->asic_reset_res && !job_signaled) { drm_sched_resubmit_jobs(&ring->sched); - drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); + if (amdgpu_gpu_recovery == 2 && + !list_empty(&ring->sched->ring_mirror_list) + && !(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter) + ) { + + s_job = list_first_entry_or_null(&ring->sched->ring_mirror_list, struct drm_sched_job, node); + ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched->timeout); + if (ret == 0) { /* timeout */ + /*reset karma to the right job*/ + if (job && job != s_job) + amdgpu_sched_decrease_karma(&job->base); + drm_sched_increase_karma(s_job); + + /* do hw reset */ + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_fini_data_exchange(adev); + r = amdgpu_device_reset_sriov(adev, false); + if (r) + adev->asic_reset_res = r; + } else { + r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset, false); + if (r && r == -EAGAIN) + goto retry; + + /* add reset counter so that the following resubmitted job could flush vmid */ + atomic_inc(&tmp_adev->gpu_reset_counter); + + //resubmit again the left jobs + drm_sched_resubmit_jobs(&ring->sched); + } + } + } + } + if (amdgpu_gpu_recovery != 2) + drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); + } + + if (amdgpu_gpu_recovery == 2) { + /* delay sched start until all jobs are submitted for advanced tdr mode */ + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = tmp_adev->rings[i]; + int ret = 0; + + if (!ring || !ring->sched.thread) + continue; + + drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); + } } + if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) { drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 865f924772b0..9c3f4edb7532 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -509,7 +509,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); * DOC: gpu_recovery (int) * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). */ -MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)"); module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 759b34799221..3457792b5d20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -281,6 +281,39 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) } } +void amdgpu_sched_decrease_karma(struct drm_sched_job *bad) +{ + int i; + struct drm_sched_entity *tmp; + struct drm_sched_entity *entity; + struct drm_gpu_scheduler *sched = bad->sched; + + /* don't decrease @bad's karma if it's from KERNEL RQ, + * because sometimes GPU hang would cause kernel jobs (like VM updating jobs) + * corrupt but keep in mind that kernel jobs always considered good. + */ + if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { + atomic_dec(&bad->karma); + for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; + i++) { + struct drm_sched_rq *rq = &sched->sched_rq[i]; + + spin_lock(&rq->lock); + list_for_each_entry_safe(entity, tmp, &rq->entities, list) { + if (bad->s_fence->scheduled.context == + entity->fence_context) { + if (entity->guilty) + atomic_set(entity->guilty, 0); + break; + } + } + spin_unlock(&rq->lock); + if (&entity->list != &rq->entities) + break; + } + } +} + const struct drm_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, -- 2.25.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx