Expedite job deletion from ring mirror list to the HW fence signal callback instead from finish_work, together with waiting for all such fences to signal in drm_sched_stop we garantee that already signaled job will not be processed twice. Remove the sched finish fence callback and just submit finish_work directly from the HW fence callback. Suggested-by: Christian Koenig <Christian.Koenig@xxxxxxx> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> --- drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- drivers/gpu/drm/scheduler/sched_main.c | 39 ++++++++++++++++----------------- include/drm/gpu_scheduler.h | 10 +++++++-- 3 files changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c index d8d2dff..e62c239 100644 --- a/drivers/gpu/drm/scheduler/sched_fence.c +++ b/drivers/gpu/drm/scheduler/sched_fence.c @@ -151,7 +151,8 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f) EXPORT_SYMBOL(to_drm_sched_fence); struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, - void *owner) + void *owner, + struct drm_sched_job *s_job) { struct drm_sched_fence *fence = NULL; unsigned seq; @@ -163,6 +164,7 @@ struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *entity, fence->owner = owner; fence->sched = entity->rq->sched; spin_lock_init(&fence->lock); + fence->s_job = s_job; seq = atomic_inc_return(&entity->fence_seq); dma_fence_init(&fence->scheduled, &drm_sched_fence_ops_scheduled, diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 8fb7f86..2860037 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -284,31 +284,17 @@ static void drm_sched_job_finish(struct work_struct *work) cancel_delayed_work_sync(&sched->work_tdr); spin_lock_irqsave(&sched->job_list_lock, flags); - /* remove job from ring_mirror_list */ - list_del_init(&s_job->node); - /* queue TDR for next job */ drm_sched_start_timeout(sched); spin_unlock_irqrestore(&sched->job_list_lock, flags); sched->ops->free_job(s_job); } -static void drm_sched_job_finish_cb(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, - finish_cb); - schedule_work(&job->finish_work); -} - static void drm_sched_job_begin(struct drm_sched_job *s_job) { struct drm_gpu_scheduler *sched = s_job->sched; unsigned long flags; - dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb, - drm_sched_job_finish_cb); - spin_lock_irqsave(&sched->job_list_lock, flags); list_add_tail(&s_job->node, &sched->ring_mirror_list); drm_sched_start_timeout(sched); @@ -418,13 +404,17 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) { struct drm_sched_job *s_job, *tmp; bool found_guilty = false; - unsigned long flags; int r; if (unpark_only) goto unpark; - spin_lock_irqsave(&sched->job_list_lock, flags); + /* + * Locking the list is not required here as the sched thread is parked + * so no new jobs are being pushed in to HW and in drm_sched_stop we + * flushed any in flight jobs who didn't signal yet. Also concurrent + * GPU recovers can't run in parallel. + */ list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { struct drm_sched_fence *s_fence = s_job->s_fence; struct dma_fence *fence; @@ -453,7 +443,6 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) } drm_sched_start_timeout(sched); - spin_unlock_irqrestore(&sched->job_list_lock, flags); unpark: kthread_unpark(sched->thread); @@ -505,7 +494,7 @@ int drm_sched_job_init(struct drm_sched_job *job, job->sched = sched; job->entity = entity; job->s_priority = entity->rq - sched->sched_rq; - job->s_fence = drm_sched_fence_create(entity, owner); + job->s_fence = drm_sched_fence_create(entity, owner, job); if (!job->s_fence) return -ENOMEM; job->id = atomic64_inc_return(&sched->job_id_count); @@ -593,15 +582,25 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb) struct drm_sched_fence *s_fence = container_of(cb, struct drm_sched_fence, cb); struct drm_gpu_scheduler *sched = s_fence->sched; + struct drm_sched_job *s_job = s_fence->s_job; + unsigned long flags; + + cancel_delayed_work(&sched->work_tdr); - dma_fence_get(&s_fence->finished); atomic_dec(&sched->hw_rq_count); atomic_dec(&sched->num_jobs); + + spin_lock_irqsave(&sched->job_list_lock, flags); + /* remove job from ring_mirror_list */ + list_del_init(&s_job->node); + spin_unlock_irqrestore(&sched->job_list_lock, flags); + drm_sched_fence_finished(s_fence); trace_drm_sched_process_job(s_fence); - dma_fence_put(&s_fence->finished); wake_up_interruptible(&sched->wake_up_worker); + + schedule_work(&s_job->finish_work); } /** diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index c94b592..23855c6 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -115,6 +115,8 @@ struct drm_sched_rq { struct drm_sched_entity *current_entity; }; +struct drm_sched_job; + /** * struct drm_sched_fence - fences corresponding to the scheduling of a job. */ @@ -160,6 +162,9 @@ struct drm_sched_fence { * @owner: job owner for debugging */ void *owner; + + /* Back pointer to owning job */ + struct drm_sched_job *s_job; }; struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f); @@ -330,8 +335,9 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, enum drm_sched_priority priority); bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); -struct drm_sched_fence *drm_sched_fence_create( - struct drm_sched_entity *s_entity, void *owner); +struct drm_sched_fence *drm_sched_fence_create(struct drm_sched_entity *s_entity, + void *owner, + struct drm_sched_job *s_job); void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void drm_sched_fence_finished(struct drm_sched_fence *fence); -- 2.7.4 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx