The old way is trying to recover hw jobs directly, which will conflict with scheduler thread. Change-Id: I9e45abd43ae280a675b0b0d88a820106dea2716c Signed-off-by: Chunming Zhou <David1.Zhou at amd.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 48 +++++++++------------------ 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 36f5805..9f4fa6e 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -324,10 +324,12 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity) * * Returns true if we could submit the job. */ -static bool amd_sched_entity_in(struct amd_sched_job *sched_job) +static bool amd_sched_entity_in_or_recover(struct amd_sched_job *sched_job, + bool recover) { struct amd_gpu_scheduler *sched = sched_job->sched; - struct amd_sched_entity *entity = sched_job->s_entity; + struct amd_sched_entity *entity = recover ? &sched->recover_entity : + sched_job->s_entity; bool added, first = false; spin_lock(&entity->queue_lock); @@ -348,6 +350,15 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) return added; } +static void amd_sched_entity_push_job_recover(struct amd_sched_job *sched_job) +{ + struct amd_sched_entity *entity = sched_job->s_entity; + + trace_amd_sched_job(sched_job); + wait_event(entity->sched->job_scheduled, + amd_sched_entity_in_or_recover(sched_job, true)); +} + /* job_finish is called after hw fence signaled, and * the job had already been deleted from ring_mirror_list */ @@ -426,39 +437,12 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) void amd_sched_job_recovery(struct amd_gpu_scheduler *sched) { struct amd_sched_job *s_job, *tmp; - int r; spin_lock(&sched->job_list_lock); - s_job = list_first_entry_or_null(&sched->ring_mirror_list, - struct amd_sched_job, node); - if (s_job) - schedule_delayed_work(&s_job->work_tdr, sched->timeout); - list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { - struct amd_sched_fence *s_fence = s_job->s_fence; - struct fence *fence, *dependency; - + list_del_init(&s_job->node); spin_unlock(&sched->job_list_lock); - while ((dependency = sched->ops->dependency(s_job))) { - fence_wait(dependency, false); - fence_put(dependency); - } - fence = sched->ops->run_job(s_job); - atomic_inc(&sched->hw_rq_count); - if (fence) { - s_fence->parent = fence_get(fence); - r = fence_add_callback(fence, &s_fence->cb, - amd_sched_process_job); - if (r == -ENOENT) - amd_sched_process_job(fence, &s_fence->cb); - else if (r) - DRM_ERROR("fence add callback failed (%d)\n", - r); - fence_put(fence); - } else { - DRM_ERROR("Failed to run job!\n"); - amd_sched_process_job(NULL, &s_fence->cb); - } + amd_sched_entity_push_job_recover(s_job); spin_lock(&sched->job_list_lock); } spin_unlock(&sched->job_list_lock); @@ -479,7 +463,7 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job) fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb, amd_sched_job_finish_cb); wait_event(entity->sched->job_scheduled, - amd_sched_entity_in(sched_job)); + amd_sched_entity_in_or_recover(sched_job, false)); } /* init a sched_job with basic field */ -- 1.9.1