From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> There is no reason to queue just a single job if scheduler can take more and re-queue the worker to queue more. We can simply feed the hardware with as much as it can take in one go and hopefully win some latency. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: Danilo Krummrich <dakr@xxxxxxxxxx> Cc: Matthew Brost <matthew.brost@xxxxxxxxx> Cc: Philipp Stanner <pstanner@xxxxxxxxxx> --- drivers/gpu/drm/scheduler/sched_main.c | 112 +++++++++++-------------- drivers/gpu/drm/scheduler/sched_rq.c | 19 ++--- include/drm/gpu_scheduler.h | 3 - 3 files changed, 58 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4ba9ed27a8a6..6f4ea8a2ca17 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -98,33 +98,6 @@ static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) return credits; } -/** - * drm_sched_can_queue -- Can we queue more to the hardware? - * @sched: scheduler instance - * @entity: the scheduler entity - * - * Return true if we can push at least one more job from @entity, false - * otherwise. - */ -bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity) -{ - struct drm_sched_job *s_job; - - s_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); - if (!s_job) - return false; - - /* If a job exceeds the credit limit, truncate it to the credit limit - * itself to guarantee forward progress. - */ - if (drm_WARN(sched, s_job->credits > sched->credit_limit, - "Jobs may not exceed the credit limit, truncate.\n")) - s_job->credits = sched->credit_limit; - - return drm_sched_available_credits(sched) >= s_job->credits; -} - /** * drm_sched_run_job_queue - enqueue run-job work * @sched: scheduler instance @@ -174,6 +147,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) atomic_sub(s_job->credits, &sched->credit_count); atomic_dec(sched->score); + drm_sched_run_job_queue(sched); trace_drm_sched_process_job(s_fence); @@ -941,7 +915,6 @@ static void drm_sched_free_job_work(struct work_struct *w) sched->ops->free_job(job); drm_sched_run_free_queue(sched); - drm_sched_run_job_queue(sched); } /** @@ -953,54 +926,71 @@ static void drm_sched_run_job_work(struct work_struct *w) { struct drm_gpu_scheduler *sched = container_of(w, struct drm_gpu_scheduler, work_run_job); + u32 job_credits, submitted_credits = 0; struct drm_sched_entity *entity; - struct dma_fence *fence; - struct drm_sched_fence *s_fence; struct drm_sched_job *sched_job; - int r; + struct dma_fence *fence; if (READ_ONCE(sched->pause_submit)) return; - /* Find entity with a ready job */ - entity = drm_sched_rq_select_entity(sched, sched->rq); - if (IS_ERR_OR_NULL(entity)) - return; /* No more work */ + for (;;) { + /* Find entity with a ready job */ + entity = drm_sched_rq_select_entity(sched, sched->rq); + if (!entity) + break; /* No more work */ - sched_job = drm_sched_entity_pop_job(entity); - if (!sched_job) { + /* + * If a job exceeds the credit limit truncate it to guarantee + * forward progress. + */ + sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); + job_credits = sched_job->credits; + if (drm_WARN_ONCE(sched, job_credits > sched->credit_limit, + "Jobs may not exceed the credit limit, truncating.\n")) + job_credits = sched_job->credits = sched->credit_limit; + + if (job_credits > drm_sched_available_credits(sched)) { + complete_all(&entity->entity_idle); + break; + } + + sched_job = drm_sched_entity_pop_job(entity); complete_all(&entity->entity_idle); - drm_sched_run_job_queue(sched); - return; - } + if (!sched_job) { + /* Top entity is not yet runnable after all */ + continue; + } - s_fence = sched_job->s_fence; + drm_sched_job_begin(sched_job); + trace_drm_run_job(sched_job, entity); + submitted_credits += job_credits; + atomic_add(job_credits, &sched->credit_count); - atomic_add(sched_job->credits, &sched->credit_count); - drm_sched_job_begin(sched_job); + fence = sched->ops->run_job(sched_job); + drm_sched_fence_scheduled(sched_job->s_fence, fence); - trace_drm_run_job(sched_job, entity); - fence = sched->ops->run_job(sched_job); - complete_all(&entity->entity_idle); - drm_sched_fence_scheduled(s_fence, fence); + if (!IS_ERR_OR_NULL(fence)) { + int r; - if (!IS_ERR_OR_NULL(fence)) { - /* Drop for original kref_init of the fence */ - dma_fence_put(fence); + /* Drop for original kref_init of the fence */ + dma_fence_put(fence); - r = dma_fence_add_callback(fence, &sched_job->cb, - drm_sched_job_done_cb); - if (r == -ENOENT) - drm_sched_job_done(sched_job, fence->error); - else if (r) - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r); - } else { - drm_sched_job_done(sched_job, IS_ERR(fence) ? - PTR_ERR(fence) : 0); + r = dma_fence_add_callback(fence, &sched_job->cb, + drm_sched_job_done_cb); + if (r == -ENOENT) + drm_sched_job_done(sched_job, fence->error); + else if (r) + DRM_DEV_ERROR(sched->dev, + "fence add callback failed (%d)\n", r); + } else { + drm_sched_job_done(sched_job, IS_ERR(fence) ? + PTR_ERR(fence) : 0); + } } - wake_up(&sched->job_scheduled); - drm_sched_run_job_queue(sched); + if (submitted_credits) + wake_up(&sched->job_scheduled); } /** diff --git a/drivers/gpu/drm/scheduler/sched_rq.c b/drivers/gpu/drm/scheduler/sched_rq.c index 0b7a2b8b48db..1a454384ab25 100644 --- a/drivers/gpu/drm/scheduler/sched_rq.c +++ b/drivers/gpu/drm/scheduler/sched_rq.c @@ -156,9 +156,7 @@ void drm_sched_rq_pop_entity(struct drm_sched_rq *rq, * * Find oldest waiting ready entity. * - * Return an entity if one is found; return an error-pointer (!NULL) if an - * entity was ready, but the scheduler had insufficient credits to accommodate - * its job; return NULL, if no ready entity was found. + * Return an entity if one is found or NULL if no ready entity was found. */ struct drm_sched_entity * drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, @@ -170,16 +168,13 @@ drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, spin_lock(&rq->lock); for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); - if (drm_sched_entity_is_ready(entity)) { - if (!drm_sched_can_queue(sched, entity)) { - entity = ERR_PTR(-ENOSPC); - break; - } - - reinit_completion(&entity->entity_idle); - break; + if (!drm_sched_entity_is_ready(entity)) { + entity = NULL; + continue; } - entity = NULL; + + reinit_completion(&entity->entity_idle); + break; } spin_unlock(&rq->lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 93f6fcfe3ba0..85f3a0d5a7be 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -544,9 +544,6 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, void drm_sched_fini(struct drm_gpu_scheduler *sched); -bool drm_sched_can_queue(struct drm_gpu_scheduler *sched, - struct drm_sched_entity *entity); - int drm_sched_job_init(struct drm_sched_job *job, struct drm_sched_entity *entity, u32 credits, void *owner); -- 2.47.1