Expedite job deletion from ring mirror list to the HW fence signal callback instead from finish_work, together with waiting for all such fences to signal in drm_sched_stop we garantee that already signaled job will not be processed twice. Remove the sched finish fence callback and just submit finish_work directly from the HW fence callback. v2: Fix comments. v3: Attach hw fence cb to sched_job v5: Rebase Suggested-by: Christian Koenig <Christian.Koenig@xxxxxxx> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> --- drivers/gpu/drm/scheduler/sched_main.c | 57 +++++++++++++++++----------------- include/drm/gpu_scheduler.h | 6 ++-- 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index b5c5bee..5f5b187 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -284,8 +284,6 @@ static void drm_sched_job_finish(struct work_struct *work) cancel_delayed_work_sync(&sched->work_tdr); spin_lock_irqsave(&sched->job_list_lock, flags); - /* remove job from ring_mirror_list */ - list_del_init(&s_job->node); /* queue TDR for next job */ drm_sched_start_timeout(sched); spin_unlock_irqrestore(&sched->job_list_lock, flags); @@ -293,22 +291,11 @@ static void drm_sched_job_finish(struct work_struct *work) sched->ops->free_job(s_job); } -static void drm_sched_job_finish_cb(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, - finish_cb); - schedule_work(&job->finish_work); -} - static void drm_sched_job_begin(struct drm_sched_job *s_job) { struct drm_gpu_scheduler *sched = s_job->sched; unsigned long flags; - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, - drm_sched_job_finish_cb); - spin_lock_irqsave(&sched->job_list_lock, flags); list_add_tail(&s_job->node, &sched->ring_mirror_list); drm_sched_start_timeout(sched); @@ -396,7 +383,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { if (s_job->s_fence->parent && dma_fence_remove_callback(s_job->s_fence->parent, - &s_job->s_fence->cb)) { + &s_job->cb)) { dma_fence_put(s_job->s_fence->parent); s_job->s_fence->parent = NULL; atomic_dec(&sched->hw_rq_count); @@ -420,7 +407,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) if (s_job->s_fence->parent) { r = dma_fence_add_callback(s_job->s_fence->parent, - &s_job->s_fence->cb, + &s_job->cb, drm_sched_process_job); if (r) DRM_ERROR("fence restore callback failed (%d)\n", @@ -449,31 +436,34 @@ EXPORT_SYMBOL(drm_sched_stop); void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) { struct drm_sched_job *s_job, *tmp; - unsigned long flags; int r; if (!full_recovery) goto unpark; - spin_lock_irqsave(&sched->job_list_lock, flags); + /* + * Locking the list is not required here as the sched thread is parked + * so no new jobs are being pushed in to HW and in drm_sched_stop we + * flushed all the jobs who were still in mirror list but who already + * signaled and removed them self from the list. Also concurrent + * GPU recovers can't run in parallel. + */ list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { - struct drm_sched_fence *s_fence = s_job->s_fence; struct dma_fence *fence = s_job->s_fence->parent; if (fence) { - r = dma_fence_add_callback(fence, &s_fence->cb, + r = dma_fence_add_callback(fence, &s_job->cb, drm_sched_process_job); if (r == -ENOENT) - drm_sched_process_job(fence, &s_fence->cb); + drm_sched_process_job(fence, &s_job->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); } else - drm_sched_process_job(NULL, &s_fence->cb); + drm_sched_process_job(NULL, &s_job->cb); } drm_sched_start_timeout(sched); - spin_unlock_irqrestore(&sched->job_list_lock, flags); unpark: kthread_unpark(sched->thread); @@ -622,18 +612,27 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) */ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) { - struct drm_sched_fence *s_fence = - container_of(cb, struct drm_sched_fence, cb); + struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb); + struct drm_sched_fence *s_fence = s_job->s_fence; struct drm_gpu_scheduler *sched = s_fence->sched; + unsigned long flags; + + cancel_delayed_work(&sched->work_tdr); - dma_fence_get(&s_fence->finished); atomic_dec(&sched->hw_rq_count); atomic_dec(&sched->num_jobs); + + spin_lock_irqsave(&sched->job_list_lock, flags); + /* remove job from ring_mirror_list */ + list_del_init(&s_job->node); + spin_unlock_irqrestore(&sched->job_list_lock, flags); + drm_sched_fence_finished(s_fence); trace_drm_sched_process_job(s_fence); - dma_fence_put(&s_fence->finished); wake_up_interruptible(&sched->wake_up_worker); + + schedule_work(&s_job->finish_work); } /** @@ -696,16 +695,16 @@ static int drm_sched_main(void *param) if (fence) { s_fence->parent = dma_fence_get(fence); - r = dma_fence_add_callback(fence, &s_fence->cb, + r = dma_fence_add_callback(fence, &sched_job->cb, drm_sched_process_job); if (r == -ENOENT) - drm_sched_process_job(fence, &s_fence->cb); + drm_sched_process_job(fence, &sched_job->cb); else if (r) DRM_ERROR("fence add callback failed (%d)\n", r); dma_fence_put(fence); } else - drm_sched_process_job(NULL, &s_fence->cb); + drm_sched_process_job(NULL, &sched_job->cb); wake_up(&sched->job_scheduled); } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 5ab2d97..6621f74 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -138,10 +138,6 @@ struct drm_sched_fence { struct dma_fence finished; /** - * @cb: the callback for the parent fence below. - */ - struct dma_fence_cb cb; - /** * @parent: the fence returned by &drm_sched_backend_ops.run_job * when scheduling the job on hardware. We signal the * &drm_sched_fence.finished fence once parent is signalled. @@ -182,6 +178,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); * be scheduled further. * @s_priority: the priority of the job. * @entity: the entity to which this job belongs. + * @cb: the callback for the parent fence in s_fence. * * A job is created by the driver using drm_sched_job_init(), and * should call drm_sched_entity_push_job() once it wants the scheduler @@ -199,6 +196,7 @@ struct drm_sched_job { atomic_t karma; enum drm_sched_priority s_priority; struct drm_sched_entity *entity; + struct dma_fence_cb cb; }; static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job, -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx