On Fri, 25 Aug 2023 15:45:49 +0200 Christian König <christian.koenig@xxxxxxx> wrote: > Am 25.08.23 um 15:36 schrieb Matthew Brost: > > On Fri, Aug 25, 2023 at 10:02:32AM +0200, Christian König wrote: > >> Am 25.08.23 um 04:58 schrieb Matthew Brost: > >>> On Fri, Aug 25, 2023 at 01:04:10AM +0200, Danilo Krummrich wrote: > >>>> On Thu, Aug 10, 2023 at 07:31:32PM -0700, Matthew Brost wrote: > >>>>> Rather than call free_job and run_job in same work item have a dedicated > >>>>> work item for each. This aligns with the design and intended use of work > >>>>> queues. > >>>>> > >>>>> Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > >>>>> --- > >>>>> drivers/gpu/drm/scheduler/sched_main.c | 137 ++++++++++++++++++------- > >>>>> include/drm/gpu_scheduler.h | 8 +- > >>>>> 2 files changed, 106 insertions(+), 39 deletions(-) > >>>>> > >>>>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > >>>>> index cede47afc800..b67469eac179 100644 > >>>>> --- a/drivers/gpu/drm/scheduler/sched_main.c > >>>>> +++ b/drivers/gpu/drm/scheduler/sched_main.c > >>>>> @@ -213,11 +213,12 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, > >>>>> * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run > >>>>> * > >>>>> * @rq: scheduler run queue to check. > >>>>> + * @dequeue: dequeue selected entity > >>>>> * > >>>>> * Try to find a ready entity, returns NULL if none found. > >>>>> */ > >>>>> static struct drm_sched_entity * > >>>>> -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) > >>>>> +drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq, bool dequeue) > >>>>> { > >>>>> struct drm_sched_entity *entity; > >>>>> @@ -227,8 +228,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) > >>>>> if (entity) { > >>>>> list_for_each_entry_continue(entity, &rq->entities, list) { > >>>>> if (drm_sched_entity_is_ready(entity)) { > >>>>> - rq->current_entity = entity; > >>>>> - reinit_completion(&entity->entity_idle); > >>>>> + if (dequeue) { > >>>>> + rq->current_entity = entity; > >>>>> + reinit_completion(&entity->entity_idle); > >>>>> + } > >>>>> spin_unlock(&rq->lock); > >>>>> return entity; > >>>>> } > >>>>> @@ -238,8 +241,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) > >>>>> list_for_each_entry(entity, &rq->entities, list) { > >>>>> if (drm_sched_entity_is_ready(entity)) { > >>>>> - rq->current_entity = entity; > >>>>> - reinit_completion(&entity->entity_idle); > >>>>> + if (dequeue) { > >>>>> + rq->current_entity = entity; > >>>>> + reinit_completion(&entity->entity_idle); > >>>>> + } > >>>>> spin_unlock(&rq->lock); > >>>>> return entity; > >>>>> } > >>>>> @@ -257,11 +262,12 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) > >>>>> * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run > >>>>> * > >>>>> * @rq: scheduler run queue to check. > >>>>> + * @dequeue: dequeue selected entity > >>>>> * > >>>>> * Find oldest waiting ready entity, returns NULL if none found. > >>>>> */ > >>>>> static struct drm_sched_entity * > >>>>> -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) > >>>>> +drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq, bool dequeue) > >>>>> { > >>>>> struct rb_node *rb; > >>>>> @@ -271,8 +277,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) > >>>>> entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); > >>>>> if (drm_sched_entity_is_ready(entity)) { > >>>>> - rq->current_entity = entity; > >>>>> - reinit_completion(&entity->entity_idle); > >>>>> + if (dequeue) { > >>>>> + rq->current_entity = entity; > >>>>> + reinit_completion(&entity->entity_idle); > >>>>> + } > >>>>> break; > >>>>> } > >>>>> } > >>>>> @@ -282,13 +290,54 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) > >>>>> } > >>>>> /** > >>>>> - * drm_sched_submit_queue - scheduler queue submission > >>>>> + * drm_sched_run_job_queue - queue job submission > >>>>> * @sched: scheduler instance > >>>>> */ > >>>>> -static void drm_sched_submit_queue(struct drm_gpu_scheduler *sched) > >>>>> +static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) > >>>>> { > >>>>> if (!READ_ONCE(sched->pause_submit)) > >>>>> - queue_work(sched->submit_wq, &sched->work_submit); > >>>>> + queue_work(sched->submit_wq, &sched->work_run_job); > >>>>> +} > >>>>> + > >>>>> +static struct drm_sched_entity * > >>>>> +drm_sched_select_entity(struct drm_gpu_scheduler *sched, bool dequeue); > >>>>> + > >>>>> +/** > >>>>> + * drm_sched_run_job_queue_if_ready - queue job submission if ready > >>>>> + * @sched: scheduler instance > >>>>> + */ > >>>>> +static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched) > >>>>> +{ > >>>>> + if (drm_sched_select_entity(sched, false)) > >>>>> + drm_sched_run_job_queue(sched); > >>>>> +} > >>>>> + > >>>>> +/** > >>>>> + * drm_sched_free_job_queue - queue free job > >>>>> + * > >>>>> + * @sched: scheduler instance to queue free job > >>>>> + */ > >>>>> +static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched) > >>>>> +{ > >>>>> + if (!READ_ONCE(sched->pause_submit)) > >>>>> + queue_work(sched->submit_wq, &sched->work_free_job); > >>>>> +} > >>>>> + > >>>>> +/** > >>>>> + * drm_sched_free_job_queue_if_ready - queue free job if ready > >>>>> + * > >>>>> + * @sched: scheduler instance to queue free job > >>>>> + */ > >>>>> +static void drm_sched_free_job_queue_if_ready(struct drm_gpu_scheduler *sched) > >>>>> +{ > >>>>> + struct drm_sched_job *job; > >>>>> + > >>>>> + spin_lock(&sched->job_list_lock); > >>>>> + job = list_first_entry_or_null(&sched->pending_list, > >>>>> + struct drm_sched_job, list); > >>>>> + if (job && dma_fence_is_signaled(&job->s_fence->finished)) > >>>>> + drm_sched_free_job_queue(sched); > >>>>> + spin_unlock(&sched->job_list_lock); > >>>>> } > >>>>> /** > >>>>> @@ -310,7 +359,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) > >>>>> dma_fence_get(&s_fence->finished); > >>>>> drm_sched_fence_finished(s_fence, result); > >>>>> dma_fence_put(&s_fence->finished); > >>>>> - drm_sched_submit_queue(sched); > >>>>> + drm_sched_free_job_queue(sched); > >>>>> } > >>>>> /** > >>>>> @@ -906,18 +955,19 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) > >>>>> void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) > >>>>> { > >>>>> if (drm_sched_can_queue(sched)) > >>>>> - drm_sched_submit_queue(sched); > >>>>> + drm_sched_run_job_queue(sched); > >>>>> } > >>>>> /** > >>>>> * drm_sched_select_entity - Select next entity to process > >>>>> * > >>>>> * @sched: scheduler instance > >>>>> + * @dequeue: dequeue selected entity > >>>>> * > >>>>> * Returns the entity to process or NULL if none are found. > >>>>> */ > >>>>> static struct drm_sched_entity * > >>>>> -drm_sched_select_entity(struct drm_gpu_scheduler *sched) > >>>>> +drm_sched_select_entity(struct drm_gpu_scheduler *sched, bool dequeue) > >>>>> { > >>>>> struct drm_sched_entity *entity; > >>>>> int i; > >>>>> @@ -935,8 +985,10 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) > >>>>> /* Kernel run queue has higher priority than normal run queue*/ > >>>>> for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { > >>>>> entity = sched->sched_policy == DRM_SCHED_POLICY_FIFO ? > >>>>> - drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) : > >>>>> - drm_sched_rq_select_entity_rr(&sched->sched_rq[i]); > >>>>> + drm_sched_rq_select_entity_fifo(&sched->sched_rq[i], > >>>>> + dequeue) : > >>>>> + drm_sched_rq_select_entity_rr(&sched->sched_rq[i], > >>>>> + dequeue); > >>>>> if (entity) > >>>>> break; > >>>>> } > >>>>> @@ -1024,30 +1076,44 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, > >>>>> EXPORT_SYMBOL(drm_sched_pick_best); > >>>>> /** > >>>>> - * drm_sched_main - main scheduler thread > >>>>> + * drm_sched_free_job_work - worker to call free_job > >>>>> * > >>>>> - * @param: scheduler instance > >>>>> + * @w: free job work > >>>>> */ > >>>>> -static void drm_sched_main(struct work_struct *w) > >>>>> +static void drm_sched_free_job_work(struct work_struct *w) > >>>>> { > >>>>> struct drm_gpu_scheduler *sched = > >>>>> - container_of(w, struct drm_gpu_scheduler, work_submit); > >>>>> - struct drm_sched_entity *entity; > >>>>> + container_of(w, struct drm_gpu_scheduler, work_free_job); > >>>>> struct drm_sched_job *cleanup_job; > >>>>> - int r; > >>>>> if (READ_ONCE(sched->pause_submit)) > >>>>> return; > >>>>> cleanup_job = drm_sched_get_cleanup_job(sched); > >>>> I tried this patch with Nouveau and found a race condition: > >>>> > >>>> In drm_sched_run_job_work() the job is added to the pending_list via > >>>> drm_sched_job_begin(), then the run_job() callback is called and the scheduled > >>>> fence is signaled. > >>>> > >>>> However, in parallel drm_sched_get_cleanup_job() might be called from > >>>> drm_sched_free_job_work(), which picks the first job from the pending_list and > >>>> for the next job on the pending_list sets the scheduled fence' timestamp field. > >> Well why can this happen in parallel? Either the work items are scheduled to > >> a single threaded work queue or you have protected the pending list with > >> some locks. > >> > > Xe uses a single-threaded work queue, Nouveau does not (desired > > behavior). I'm a bit worried that leaving this single vs multi-threaded wq decision to drivers is going to cause unnecessary pain, because what was previously a granted in term of run/cleanup execution order (thanks to the kthread+static-drm_sched_main-workflow approach) is now subject to the wq ordering guarantees, which depend on the wq type picked by the driver. > > > > The list of pending jobs is protected by a lock (safe), the race is: > > > > add job to pending list > > run_job > > signal scheduled fence > > > > dequeue from pending list > > free_job > > update timestamp > > > > Once a job is on the pending list its timestamp can be accessed which > > can blow up if scheduled fence isn't signaled or more specifically unless > > DMA_FENCE_FLAG_TIMESTAMP_BIT is set. Ah, so that's the reason for the TIMESTAMP test added in v3. Sorry for the noise in my v3 review, but I still think it'd be beneficial to have that change moved to its own commit.