On Mon, Dec 30, 2024 at 04:52:48PM +0000, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> > > Round-robin being the non-default policy and unclear how much it is used, I don't know either, but would be nice if we could instead remove it. > we can notice that it can be implemented using the FIFO data structures if > we only invent a fake submit timestamp which is monotonically increasing > inside drm_sched_rq instances. > > So instead of remembering which was the last entity the scheduler worker > picked, we can bump the picked one to the bottom of the tree, achieving > the same round-robin behaviour. > > Advantage is that we can consolidate to a single code path and remove a > bunch of code. Downside is round-robin mode now needs to lock on the job > pop path but that should not be visible. I guess you did you test both scheduler strategies with this change? > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> > Cc: Christian König <christian.koenig@xxxxxxx> > Cc: Danilo Krummrich <dakr@xxxxxxxxxx> > Cc: Matthew Brost <matthew.brost@xxxxxxxxx> > Cc: Philipp Stanner <pstanner@xxxxxxxxxx> > --- > drivers/gpu/drm/scheduler/sched_entity.c | 53 ++++++++----- > drivers/gpu/drm/scheduler/sched_main.c | 99 +++--------------------- > include/drm/gpu_scheduler.h | 5 +- > 3 files changed, 48 insertions(+), 109 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c > index 8e910586979e..cb5f596b48b7 100644 > --- a/drivers/gpu/drm/scheduler/sched_entity.c > +++ b/drivers/gpu/drm/scheduler/sched_entity.c > @@ -473,9 +473,20 @@ drm_sched_job_dependency(struct drm_sched_job *job, > return NULL; > } > > +static ktime_t > +drm_sched_rq_get_rr_deadline(struct drm_sched_rq *rq) > +{ > + lockdep_assert_held(&rq->lock); > + > + rq->rr_deadline = ktime_add_ns(rq->rr_deadline, 1); > + > + return rq->rr_deadline; > +} > + > struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) > { > - struct drm_sched_job *sched_job; > + struct drm_sched_job *sched_job, *next_job; > + struct drm_sched_rq *rq; > > sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); > if (!sched_job) > @@ -510,24 +521,28 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) > * Update the entity's location in the min heap according to > * the timestamp of the next job, if any. > */ > - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) { > - struct drm_sched_job *next; > - struct drm_sched_rq *rq; > + next_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); > > - spin_lock(&entity->lock); > - rq = entity->rq; > - spin_lock(&rq->lock); > - next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); > - if (next) { > - drm_sched_rq_update_fifo_locked(entity, rq, > - next->submit_ts); > - } else { > - drm_sched_rq_remove_fifo_locked(entity, rq); > - } > - spin_unlock(&rq->lock); > - spin_unlock(&entity->lock); > + spin_lock(&entity->lock); > + rq = entity->rq; > + spin_lock(&rq->lock); > + > + if (next_job) { > + ktime_t ts; > + > + if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) > + ts = next_job->submit_ts; > + else > + ts = drm_sched_rq_get_rr_deadline(rq); > + > + drm_sched_rq_update_fifo_locked(entity, rq, ts); > + } else { > + drm_sched_rq_remove_fifo_locked(entity, rq); > } > > + spin_unlock(&rq->lock); > + spin_unlock(&entity->lock); > + > /* Jobs and entities might have different lifecycles. Since we're > * removing the job from the entities queue, set the jobs entity pointer > * to NULL to prevent any future access of the entity through this job. > @@ -624,9 +639,9 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) > > spin_lock(&rq->lock); > drm_sched_rq_add_entity(rq, entity); > - > - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) > - drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); > + if (drm_sched_policy == DRM_SCHED_POLICY_RR) > + submit_ts = drm_sched_rq_get_rr_deadline(rq); > + drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); > > spin_unlock(&rq->lock); > spin_unlock(&entity->lock); > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 9beb4c611988..eb22b1b7de36 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -189,7 +189,6 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, > spin_lock_init(&rq->lock); > INIT_LIST_HEAD(&rq->entities); > rq->rb_tree_root = RB_ROOT_CACHED; > - rq->current_entity = NULL; > rq->sched = sched; > } > > @@ -235,82 +234,13 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, > atomic_dec(rq->sched->score); > list_del_init(&entity->list); > > - if (rq->current_entity == entity) > - rq->current_entity = NULL; > - > - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) > - drm_sched_rq_remove_fifo_locked(entity, rq); > + drm_sched_rq_remove_fifo_locked(entity, rq); > > spin_unlock(&rq->lock); > } > > /** > - * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run > - * > - * @sched: the gpu scheduler > - * @rq: scheduler run queue to check. > - * > - * Try to find the next ready entity. > - * > - * Return an entity if one is found; return an error-pointer (!NULL) if an > - * entity was ready, but the scheduler had insufficient credits to accommodate > - * its job; return NULL, if no ready entity was found. > - */ > -static struct drm_sched_entity * > -drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, > - struct drm_sched_rq *rq) > -{ > - struct drm_sched_entity *entity; > - > - spin_lock(&rq->lock); > - > - entity = rq->current_entity; > - if (entity) { > - list_for_each_entry_continue(entity, &rq->entities, list) { > - if (drm_sched_entity_is_ready(entity)) { > - /* If we can't queue yet, preserve the current > - * entity in terms of fairness. > - */ > - if (!drm_sched_can_queue(sched, entity)) { > - spin_unlock(&rq->lock); > - return ERR_PTR(-ENOSPC); > - } > - > - rq->current_entity = entity; > - reinit_completion(&entity->entity_idle); > - spin_unlock(&rq->lock); > - return entity; > - } > - } > - } > - > - list_for_each_entry(entity, &rq->entities, list) { > - if (drm_sched_entity_is_ready(entity)) { > - /* If we can't queue yet, preserve the current entity in > - * terms of fairness. > - */ > - if (!drm_sched_can_queue(sched, entity)) { > - spin_unlock(&rq->lock); > - return ERR_PTR(-ENOSPC); > - } > - > - rq->current_entity = entity; > - reinit_completion(&entity->entity_idle); > - spin_unlock(&rq->lock); > - return entity; > - } > - > - if (entity == rq->current_entity) > - break; > - } > - > - spin_unlock(&rq->lock); > - > - return NULL; > -} > - > -/** > - * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run > + * drm_sched_rq_select_entity - Select an entity which provides a job to run > * > * @sched: the gpu scheduler > * @rq: scheduler run queue to check. > @@ -322,32 +252,29 @@ drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched, > * its job; return NULL, if no ready entity was found. > */ > static struct drm_sched_entity * > -drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched, > - struct drm_sched_rq *rq) > +drm_sched_rq_select_entity(struct drm_gpu_scheduler *sched, > + struct drm_sched_rq *rq) > { > + struct drm_sched_entity *entity = NULL; > struct rb_node *rb; > > spin_lock(&rq->lock); > for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) { > - struct drm_sched_entity *entity; > - > entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); > if (drm_sched_entity_is_ready(entity)) { > - /* If we can't queue yet, preserve the current entity in > - * terms of fairness. > - */ > if (!drm_sched_can_queue(sched, entity)) { > - spin_unlock(&rq->lock); > - return ERR_PTR(-ENOSPC); > + entity = ERR_PTR(-ENOSPC); > + break; > } > > reinit_completion(&entity->entity_idle); > break; > } > + entity = NULL; > } > spin_unlock(&rq->lock); > > - return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL; > + return entity; > } The whole diff of drm_sched_rq_select_entity_fifo() (except for the name) has nothing to do with the patch, does it? If you want refactor things, please do it in a separate patch. > > /** > @@ -1045,20 +972,18 @@ void drm_sched_wakeup(struct drm_gpu_scheduler *sched) > static struct drm_sched_entity * > drm_sched_select_entity(struct drm_gpu_scheduler *sched) > { > - struct drm_sched_entity *entity; > + struct drm_sched_entity *entity = NULL; > int i; > > /* Start with the highest priority. > */ > for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { > - entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? > - drm_sched_rq_select_entity_fifo(sched, sched->sched_rq[i]) : > - drm_sched_rq_select_entity_rr(sched, sched->sched_rq[i]); > + entity = drm_sched_rq_select_entity(sched, sched->sched_rq[i]); > if (entity) > break; > } > > - return IS_ERR(entity) ? NULL : entity; > + return IS_ERR(entity) ? NULL : entity;; > } > > /** > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index 978ca621cc13..db65600732b9 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -245,8 +245,7 @@ struct drm_sched_entity { > * struct drm_sched_rq - queue of entities to be scheduled. > * > * @sched: the scheduler to which this rq belongs to. > - * @lock: protects @entities, @rb_tree_root and @current_entity. > - * @current_entity: the entity which is to be scheduled. > + * @lock: protects @entities, @rb_tree_root and @rr_deadline. > * @entities: list of the entities to be scheduled. > * @rb_tree_root: root of time based priority queue of entities for FIFO scheduling > * > @@ -259,7 +258,7 @@ struct drm_sched_rq { > > spinlock_t lock; > /* Following members are protected by the @lock: */ > - struct drm_sched_entity *current_entity; > + ktime_t rr_deadline; > struct list_head entities; > struct rb_root_cached rb_tree_root; > }; > -- > 2.47.1 >