From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Having removed one re-lock cycle on the entity->lock in a patch titled "drm/sched: Optimise drm_sched_entity_push_job", with only a tiny bit larger refactoring we can do the same optimisation on the rq->lock. (Currently both drm_sched_rq_add_entity() and drm_sched_rq_update_fifo_locked() take and release the same lock.) To achieve this we rename drm_sched_rq_add_entity() to drm_sched_rq_add_entity_locked(), making it expect the rq->lock to be held, and also add the same expectation to drm_sched_rq_update_fifo_locked(). Finally, to align drm_sched_rq_update_fifo_locked(), drm_sched_rq_add_entity_locked() and drm_sched_rq_remove_fifo_locked() function signatures, we add rq as a parameter to the latter. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: Alex Deucher <alexander.deucher@xxxxxxx> Cc: Luben Tuikov <ltuikov89@xxxxxxxxx> Cc: Matthew Brost <matthew.brost@xxxxxxxxx> Cc: Philipp Stanner <pstanner@xxxxxxxxxx> --- drivers/gpu/drm/scheduler/sched_entity.c | 8 ++++-- drivers/gpu/drm/scheduler/sched_main.c | 34 +++++++++++------------- include/drm/gpu_scheduler.h | 7 ++--- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index d982cebc6bee..c48f17faef41 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -517,6 +517,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) if (next) { spin_lock(&entity->lock); drm_sched_rq_update_fifo_locked(entity, + entity->rq, next->submit_ts); spin_unlock(&entity->lock); } @@ -618,11 +619,14 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) sched = rq->sched; atomic_inc(sched->score); - drm_sched_rq_add_entity(rq, entity); + + spin_lock(&rq->lock); + drm_sched_rq_add_entity_locked(rq, entity); if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_update_fifo_locked(entity, submit_ts); + drm_sched_rq_update_fifo_locked(entity, rq, submit_ts); + spin_unlock(&rq->lock); spin_unlock(&entity->lock); drm_sched_wakeup(sched, entity); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 18a952f73ecb..c0d3f6ac3ae3 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -153,17 +153,18 @@ static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, return ktime_before(ent_a->oldest_job_waiting, ent_b->oldest_job_waiting); } -static inline void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity) +static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq) { - struct drm_sched_rq *rq = entity->rq; - if (!RB_EMPTY_NODE(&entity->rb_tree_node)) { rb_erase_cached(&entity->rb_tree_node, &rq->rb_tree_root); RB_CLEAR_NODE(&entity->rb_tree_node); } } -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts) +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq, + ktime_t ts) { /* * Both locks need to be grabbed, one to protect from entity->rq change @@ -171,17 +172,14 @@ void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts * other to update the rb tree structure. */ lockdep_assert_held(&entity->lock); + lockdep_assert_held(&rq->lock); - spin_lock(&entity->rq->lock); - - drm_sched_rq_remove_fifo_locked(entity); + drm_sched_rq_remove_fifo_locked(entity, rq); entity->oldest_job_waiting = ts; - rb_add_cached(&entity->rb_tree_node, &entity->rq->rb_tree_root, + rb_add_cached(&entity->rb_tree_node, &rq->rb_tree_root, drm_sched_entity_compare_before); - - spin_unlock(&entity->rq->lock); } /** @@ -203,25 +201,23 @@ static void drm_sched_rq_init(struct drm_gpu_scheduler *sched, } /** - * drm_sched_rq_add_entity - add an entity + * drm_sched_rq_add_entity_locked - add an entity * * @rq: scheduler run queue * @entity: scheduler entity * * Adds a scheduler entity to the run queue. */ -void drm_sched_rq_add_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity) +void drm_sched_rq_add_entity_locked(struct drm_sched_rq *rq, + struct drm_sched_entity *entity) { + lockdep_assert_held(&rq->lock); + if (!list_empty(&entity->list)) return; - spin_lock(&rq->lock); - atomic_inc(rq->sched->score); list_add_tail(&entity->list, &rq->entities); - - spin_unlock(&rq->lock); } /** @@ -235,6 +231,8 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq, void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity) { + lockdep_assert_held(&entity->lock); + if (list_empty(&entity->list)) return; @@ -247,7 +245,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, rq->current_entity = NULL; if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) - drm_sched_rq_remove_fifo_locked(entity); + drm_sched_rq_remove_fifo_locked(entity, rq); spin_unlock(&rq->lock); } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 80198e6cf537..30686961a379 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -591,12 +591,13 @@ bool drm_sched_dependency_optimized(struct dma_fence* fence, struct drm_sched_entity *entity); void drm_sched_fault(struct drm_gpu_scheduler *sched); -void drm_sched_rq_add_entity(struct drm_sched_rq *rq, - struct drm_sched_entity *entity); +void drm_sched_rq_add_entity_locked(struct drm_sched_rq *rq, + struct drm_sched_entity *entity); void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, struct drm_sched_entity *entity); -void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, ktime_t ts); +void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity, + struct drm_sched_rq *rq, ktime_t ts); int drm_sched_entity_init(struct drm_sched_entity *entity, enum drm_sched_priority priority, -- 2.46.0