On 2023-10-05 00:06, Matthew Brost wrote: > On Thu, Sep 28, 2023 at 12:14:12PM -0400, Luben Tuikov wrote: >> On 2023-09-19 01:01, Matthew Brost wrote: >>> Rather than call free_job and run_job in same work item have a dedicated >>> work item for each. This aligns with the design and intended use of work >>> queues. >>> >>> v2: >>> - Test for DMA_FENCE_FLAG_TIMESTAMP_BIT before setting >>> timestamp in free_job() work item (Danilo) >>> v3: >>> - Drop forward dec of drm_sched_select_entity (Boris) >>> - Return in drm_sched_run_job_work if entity NULL (Boris) >>> >>> Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> >>> --- >>> drivers/gpu/drm/scheduler/sched_main.c | 290 +++++++++++++++---------- >>> include/drm/gpu_scheduler.h | 8 +- >>> 2 files changed, 182 insertions(+), 116 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c >>> index 588c735f7498..1e21d234fb5c 100644 >>> --- a/drivers/gpu/drm/scheduler/sched_main.c >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c >>> @@ -213,11 +213,12 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, >>> * drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run >>> * >>> * @rq: scheduler run queue to check. >>> + * @dequeue: dequeue selected entity >> >> Change this to "peek" as indicated below. >> >>> * >>> * Try to find a ready entity, returns NULL if none found. >>> */ >>> static struct drm_sched_entity * >>> -drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) >>> +drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq, bool dequeue) >>> { >>> struct drm_sched_entity *entity; >>> >>> @@ -227,8 +228,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) >>> if (entity) { >>> list_for_each_entry_continue(entity, &rq->entities, list) { >>> if (drm_sched_entity_is_ready(entity)) { >>> - rq->current_entity = entity; >>> - reinit_completion(&entity->entity_idle); >>> + if (dequeue) { >>> + rq->current_entity = entity; >>> + reinit_completion(&entity->entity_idle); >>> + } >> >> Please rename "dequeue" or invert its logic, as from this patch it seems that >> it is hiding (gating out) current behaviour. >> >> Ideally, I'd prefer it be inverted, so that current behaviour, i.e. what people >> are used to the rq_select_entity_*() to do, is default--preserved. >> >> Perhaps use "peek" as the name of this new variable, to indicate that >> we're not setting it to be the current entity. >> >> I prefer "peek" to others, as the former tells me "Hey, I'm only >> peeking at the rq and not really doing the default behaviour I've been >> doing which you're used to." So, probably use "peek". ("Peek" also has historical >> significance...). >> > > Peek it is. Will change. > >>> spin_unlock(&rq->lock); >>> return entity; >>> } >>> @@ -238,8 +241,10 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) >>> list_for_each_entry(entity, &rq->entities, list) { >>> >>> if (drm_sched_entity_is_ready(entity)) { >>> - rq->current_entity = entity; >>> - reinit_completion(&entity->entity_idle); >>> + if (dequeue) { >> >> if (!peek) { >> > > +1 > >>> + rq->current_entity = entity; >>> + reinit_completion(&entity->entity_idle); >>> + } >>> spin_unlock(&rq->lock); >>> return entity; >>> } >>> @@ -257,11 +262,12 @@ drm_sched_rq_select_entity_rr(struct drm_sched_rq *rq) >>> * drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run >>> * >>> * @rq: scheduler run queue to check. >>> + * @dequeue: dequeue selected entity >> >> * @peek: Just find, don't set to current. >> > > +1 > >>> * >>> * Find oldest waiting ready entity, returns NULL if none found.> */ >>> static struct drm_sched_entity * >>> -drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) >>> +drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq, bool dequeue) >>> { >>> struct rb_node *rb; >>> >>> @@ -271,8 +277,10 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) >>> >>> entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node); >>> if (drm_sched_entity_is_ready(entity)) { >>> - rq->current_entity = entity; >>> - reinit_completion(&entity->entity_idle); >>> + if (dequeue) { >> >> if (!peek) { >> >>> + rq->current_entity = entity; >>> + reinit_completion(&entity->entity_idle); >>> + } >>> break; >>> } >>> } >>> @@ -282,13 +290,102 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) >>> } >>> >>> /** >>> - * drm_sched_submit_queue - scheduler queue submission >>> + * drm_sched_run_job_queue - queue job submission >>> + * @sched: scheduler instance >>> + */ >> >> Perhaps it would be clearer to a DOC reader if there were verbs >> in this function comment? I feel this was mentioned in the review >> to patch 2... >> >>> +static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched) >>> +{ >>> + if (!READ_ONCE(sched->pause_submit)) >>> + queue_work(sched->submit_wq, &sched->work_run_job); >>> +} >>> + >>> +/** >>> + * drm_sched_can_queue -- Can we queue more to the hardware? >>> + * @sched: scheduler instance >>> + * >>> + * Return true if we can push more jobs to the hw, otherwise false. >>> + */ >>> +static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) >>> +{ >>> + return atomic_read(&sched->hw_rq_count) < >>> + sched->hw_submission_limit; >>> +} >>> + >>> +/** >>> + * drm_sched_select_entity - Select next entity to process >>> + * >>> + * @sched: scheduler instance >>> + * @dequeue: dequeue selected entity >> >> When I see "dequeue" I'm thinking "list_del()". Let's >> use "peek" here as mentioned above. >> >>> + * >>> + * Returns the entity to process or NULL if none are found. >>> + */ >>> +static struct drm_sched_entity * >>> +drm_sched_select_entity(struct drm_gpu_scheduler *sched, bool dequeue) >> >> drm_sched_select_entity(struct drm_gpu_scheduler *sched, bool peek) >> > > +1 > >>> +{ >>> + struct drm_sched_entity *entity; >>> + int i; >>> + >>> + if (!drm_sched_can_queue(sched)) >>> + return NULL; >>> + >>> + if (sched->single_entity) { >>> + if (!READ_ONCE(sched->single_entity->stopped) && >>> + drm_sched_entity_is_ready(sched->single_entity)) >>> + return sched->single_entity; >>> + >>> + return NULL; >>> + } >>> + >>> + /* Kernel run queue has higher priority than normal run queue*/ >>> + for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { >>> + entity = sched->sched_policy == DRM_SCHED_POLICY_FIFO ? >>> + drm_sched_rq_select_entity_fifo(&sched->sched_rq[i], >>> + dequeue) : >>> + drm_sched_rq_select_entity_rr(&sched->sched_rq[i], >>> + dequeue); >>> + if (entity) >>> + break; >>> + } >>> + >>> + return entity; >>> +} >>> + >>> +/** >>> + * drm_sched_run_job_queue_if_ready - queue job submission if ready >>> * @sched: scheduler instance >>> */ >>> -static void drm_sched_submit_queue(struct drm_gpu_scheduler *sched) >>> +static void drm_sched_run_job_queue_if_ready(struct drm_gpu_scheduler *sched) >>> +{ >>> + if (drm_sched_select_entity(sched, false)) >>> + drm_sched_run_job_queue(sched); >>> +} >>> + >>> +/** >>> + * drm_sched_free_job_queue - queue free job >> >> * drm_sched_free_job_queue - enqueue free-job work >> >>> + * >>> + * @sched: scheduler instance to queue free job >> >> * @sched: scheduler instance to queue free job work for >> >> > > Will change both. > >>> + */ >>> +static void drm_sched_free_job_queue(struct drm_gpu_scheduler *sched) >>> { >>> if (!READ_ONCE(sched->pause_submit)) >>> - queue_work(sched->submit_wq, &sched->work_submit); >>> + queue_work(sched->submit_wq, &sched->work_free_job); >>> +} >>> + >>> +/** >>> + * drm_sched_free_job_queue_if_ready - queue free job if ready >> >> * drm_sched_free_job_queue_if_ready - enqueue free-job work if ready >> > > Will change this too. > >>> + * >>> + * @sched: scheduler instance to queue free job >>> + */ >>> +static void drm_sched_free_job_queue_if_ready(struct drm_gpu_scheduler *sched) >>> +{ >>> + struct drm_sched_job *job; >>> + >>> + spin_lock(&sched->job_list_lock); >>> + job = list_first_entry_or_null(&sched->pending_list, >>> + struct drm_sched_job, list); >>> + if (job && dma_fence_is_signaled(&job->s_fence->finished)) >>> + drm_sched_free_job_queue(sched); >>> + spin_unlock(&sched->job_list_lock); >>> } >>> >>> /** >>> @@ -310,7 +407,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job, int result) >>> dma_fence_get(&s_fence->finished); >>> drm_sched_fence_finished(s_fence, result); >>> dma_fence_put(&s_fence->finished); >>> - drm_sched_submit_queue(sched); >>> + drm_sched_free_job_queue(sched); >>> } >>> >>> /** >>> @@ -885,18 +982,6 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) >>> } >>> EXPORT_SYMBOL(drm_sched_job_cleanup); >>> >>> -/** >>> - * drm_sched_can_queue -- Can we queue more to the hardware? >>> - * @sched: scheduler instance >>> - * >>> - * Return true if we can push more jobs to the hw, otherwise false. >>> - */ >>> -static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) >>> -{ >>> - return atomic_read(&sched->hw_rq_count) < >>> - sched->hw_submission_limit; >>> -} >>> - >>> /** >>> * drm_sched_wakeup_if_can_queue - Wake up the scheduler >>> * @sched: scheduler instance >>> @@ -906,43 +991,7 @@ static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched) >>> void drm_sched_wakeup_if_can_queue(struct drm_gpu_scheduler *sched) >>> { >>> if (drm_sched_can_queue(sched)) >>> - drm_sched_submit_queue(sched); >>> -} >>> - >>> -/** >>> - * drm_sched_select_entity - Select next entity to process >>> - * >>> - * @sched: scheduler instance >>> - * >>> - * Returns the entity to process or NULL if none are found. >>> - */ >>> -static struct drm_sched_entity * >>> -drm_sched_select_entity(struct drm_gpu_scheduler *sched) >>> -{ >>> - struct drm_sched_entity *entity; >>> - int i; >>> - >>> - if (!drm_sched_can_queue(sched)) >>> - return NULL; >>> - >>> - if (sched->single_entity) { >>> - if (!READ_ONCE(sched->single_entity->stopped) && >>> - drm_sched_entity_is_ready(sched->single_entity)) >>> - return sched->single_entity; >>> - >>> - return NULL; >>> - } >>> - >>> - /* Kernel run queue has higher priority than normal run queue*/ >>> - for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { >>> - entity = sched->sched_policy == DRM_SCHED_POLICY_FIFO ? >>> - drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) : >>> - drm_sched_rq_select_entity_rr(&sched->sched_rq[i]); >>> - if (entity) >>> - break; >>> - } >>> - >>> - return entity; >>> + drm_sched_run_job_queue(sched); >>> } >>> >>> /** >>> @@ -974,8 +1023,10 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) >>> typeof(*next), list); >>> >>> if (next) { >>> - next->s_fence->scheduled.timestamp = >>> - job->s_fence->finished.timestamp; >>> + if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, >>> + &next->s_fence->scheduled.flags)) >>> + next->s_fence->scheduled.timestamp = >>> + job->s_fence->finished.timestamp; >>> /* start TO timer for next job */ >>> drm_sched_start_timeout(sched); >>> } >>> @@ -1025,74 +1076,84 @@ drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, >>> EXPORT_SYMBOL(drm_sched_pick_best); >>> >>> /** >>> - * drm_sched_main - main scheduler thread >>> + * drm_sched_free_job_work - worker to call free_job >>> * >>> - * @param: scheduler instance >>> + * @w: free job work >>> */ >>> -static void drm_sched_main(struct work_struct *w) >>> +static void drm_sched_free_job_work(struct work_struct *w) >>> { >>> struct drm_gpu_scheduler *sched = >>> - container_of(w, struct drm_gpu_scheduler, work_submit); >>> - struct drm_sched_entity *entity; >>> + container_of(w, struct drm_gpu_scheduler, work_free_job); >>> struct drm_sched_job *cleanup_job; >>> - int r; >>> >>> if (READ_ONCE(sched->pause_submit)) >>> return; >>> >>> cleanup_job = drm_sched_get_cleanup_job(sched); >>> - entity = drm_sched_select_entity(sched); >>> - >>> - if (!entity && !cleanup_job) >>> - return; /* No more work */ >>> - >>> - if (cleanup_job) >>> + if (cleanup_job) { >>> sched->ops->free_job(cleanup_job); >>> >>> - if (entity) { >>> - struct dma_fence *fence; >>> - struct drm_sched_fence *s_fence; >>> - struct drm_sched_job *sched_job; >>> - >>> - sched_job = drm_sched_entity_pop_job(entity); >>> - if (!sched_job) { >>> - complete_all(&entity->entity_idle); >>> - if (!cleanup_job) >>> - return; /* No more work */ >>> - goto again; >>> - } >>> + drm_sched_free_job_queue_if_ready(sched); >>> + drm_sched_run_job_queue_if_ready(sched); >>> + } >>> +} >>> + >>> +/** >>> + * drm_sched_run_job_work - worker to call run_job >>> + * >>> + * @w: run job work >>> + */ >>> +static void drm_sched_run_job_work(struct work_struct *w) >>> +{ >>> + struct drm_gpu_scheduler *sched = >>> + container_of(w, struct drm_gpu_scheduler, work_run_job); >>> + struct drm_sched_entity *entity; >>> + struct dma_fence *fence; >>> + struct drm_sched_fence *s_fence; >>> + struct drm_sched_job *sched_job; >>> + int r; >>> >>> - s_fence = sched_job->s_fence; >>> + if (READ_ONCE(sched->pause_submit)) >>> + return; >>> >>> - atomic_inc(&sched->hw_rq_count); >>> - drm_sched_job_begin(sched_job); >>> + entity = drm_sched_select_entity(sched, true); >>> + if (!entity) >>> + return; >>> >>> - trace_drm_run_job(sched_job, entity); >>> - fence = sched->ops->run_job(sched_job); >>> + sched_job = drm_sched_entity_pop_job(entity); >>> + if (!sched_job) { >>> complete_all(&entity->entity_idle); >>> - drm_sched_fence_scheduled(s_fence, fence); >>> + return; /* No more work */ >>> + } >>> >>> - if (!IS_ERR_OR_NULL(fence)) { >>> - /* Drop for original kref_init of the fence */ >>> - dma_fence_put(fence); >>> + s_fence = sched_job->s_fence; >>> >>> - r = dma_fence_add_callback(fence, &sched_job->cb, >>> - drm_sched_job_done_cb); >>> - if (r == -ENOENT) >>> - drm_sched_job_done(sched_job, fence->error); >>> - else if (r) >>> - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", >>> - r); >>> - } else { >>> - drm_sched_job_done(sched_job, IS_ERR(fence) ? >>> - PTR_ERR(fence) : 0); >>> - } >>> + atomic_inc(&sched->hw_rq_count); >>> + drm_sched_job_begin(sched_job); >>> + >>> + trace_drm_run_job(sched_job, entity); >>> + fence = sched->ops->run_job(sched_job); >>> + complete_all(&entity->entity_idle); >>> + drm_sched_fence_scheduled(s_fence, fence); >>> >>> - wake_up(&sched->job_scheduled); >>> + if (!IS_ERR_OR_NULL(fence)) { >>> + /* Drop for original kref_init of the fence */ >>> + dma_fence_put(fence); >>> + >>> + r = dma_fence_add_callback(fence, &sched_job->cb, >>> + drm_sched_job_done_cb); >>> + if (r == -ENOENT) >>> + drm_sched_job_done(sched_job, fence->error); >>> + else if (r) >>> + DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", >>> + r); >> >> Please align "r);" to the open brace on the previous line. If you're using Emacs >> with sane Linux settings, press the "Tab" key anywhere on the line to indent it. >> (It should run c-indent-line-or-region, usually using leading-tabs-only mode. Pressing >> it again, over and over, on an already indented line, does nothing. Column indenting--say >> for columns in 2D/3D/etc., array, usually happens using spaces, which is portable. >> Also please take an overview with "scrips/checkpatch.pl --strict".) >> > > Will run checkpatch. > >> Wrap-around was bumped to 100 in the Linux kernel so you can put the 'r' on >> the same line without style problems. >> > > Using Vi with wrap around of 80 but know 100 is allowed. Will fix. Ah, yes--I now see where the real problem is. :-D -- Regards, Luben