merge the setting guilty on context into this function to avoid implement extra routine. Change-Id: I7a0063464fdc85d5ac9080046380e745565ff540 Signed-off-by: Monk Liu <Monk.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 13 ++++++++++++- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index dae1e5b..a07544d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2868,7 +2868,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) amd_sched_job_kickout(&job->base); /* only do job_reset on the hang ring if @job not NULL */ - amd_sched_hw_job_reset(&ring->sched); + amd_sched_hw_job_reset(&ring->sched, NULL); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); @@ -2989,7 +2989,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) if (!ring || !ring->sched.thread) continue; kthread_park(ring->sched.thread); - amd_sched_hw_job_reset(&ring->sched); + amd_sched_hw_job_reset(&ring->sched, NULL); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ amdgpu_fence_driver_force_completion(ring); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index e4d3b4e..322b6c1 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -442,7 +442,14 @@ static void amd_sched_job_timedout(struct work_struct *work) job->sched->ops->timedout_job(job); } -void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) +static void amd_sched_set_guilty(struct amd_sched_job *s_job) +{ + if (atomic_inc_return(&s_job->karma) > s_job->sched->hang_limit) + if (s_job->s_entity->guilty) + atomic_set(s_job->s_entity->guilty, 1); +} + +void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *bad) { struct amd_sched_job *s_job; @@ -455,6 +462,10 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) s_job->s_fence->parent = NULL; atomic_dec(&sched->hw_rq_count); } + + if (bad && bad == s_job) { + amd_sched_set_guilty(s_job); + } } spin_unlock(&sched->job_list_lock); } diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index 2d59fc5..fff7cc7 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -172,7 +172,7 @@ int amd_sched_job_init(struct amd_sched_job *job, struct amd_gpu_scheduler *sched, struct amd_sched_entity *entity, void *owner); -void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); +void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched, struct amd_sched_job *job); void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); bool amd_sched_dependency_optimized(struct dma_fence* fence, struct amd_sched_entity *entity); -- 2.7.4