Please cc dri-devel on all scheduler patches. It's core functionality. Alex On Wed, Aug 25, 2021 at 12:14 AM Monk Liu <Monk.Liu@xxxxxxx> wrote: > > the original logic is wrong that the timeout will not be retriggerd > after the previous job siganled, and that lead to the scenario that all > jobs in the same scheduler shares the same timeout timer from the very > begining job in this scheduler which is wrong. > > we should modify the timer everytime a previous job signaled. > > v2: > further cleanup the logic, and do the TDR timer cancelling if the signaled job > is the last one in its scheduler. > > Signed-off-by: Monk Liu <Monk.Liu@xxxxxxx> > --- > drivers/gpu/drm/scheduler/sched_main.c | 29 ++++++++++++++++++++--------- > 1 file changed, 20 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index a2a9536..8c102ac 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -305,8 +305,17 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) > struct drm_gpu_scheduler *sched = s_job->sched; > > spin_lock(&sched->job_list_lock); > - list_add_tail(&s_job->list, &sched->pending_list); > - drm_sched_start_timeout(sched); > + if (list_empty(&sched->pending_list)) { > + list_add_tail(&s_job->list, &sched->pending_list); > + drm_sched_start_timeout(sched); > + } else { > + /* the old jobs in pending list are not finished yet > + * no need to restart TDR timer here, it is already > + * handled by drm_sched_get_cleanup_job > + */ > + list_add_tail(&s_job->list, &sched->pending_list); > + } > + > spin_unlock(&sched->job_list_lock); > } > > @@ -693,17 +702,22 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) > if (job && dma_fence_is_signaled(&job->s_fence->finished)) { > /* remove job from pending_list */ > list_del_init(&job->list); > + > /* make the scheduled timestamp more accurate */ > next = list_first_entry_or_null(&sched->pending_list, > typeof(*next), list); > - if (next) > + if (next) { > + /* if we still have job in pending list we need modify the TDR timer */ > + mod_delayed_work(system_wq, &sched->work_tdr, sched->timeout); > next->s_fence->scheduled.timestamp = > job->s_fence->finished.timestamp; > + } else { > + /* cancel the TDR timer if no job in pending list */ > + cancel_delayed_work(&sched->work_tdr); > + } > > } else { > job = NULL; > - /* queue timeout for next job */ > - drm_sched_start_timeout(sched); > } > > spin_unlock(&sched->job_list_lock); > @@ -791,11 +805,8 @@ static int drm_sched_main(void *param) > (entity = drm_sched_select_entity(sched))) || > kthread_should_stop()); > > - if (cleanup_job) { > + if (cleanup_job) > sched->ops->free_job(cleanup_job); > - /* queue timeout for next job */ > - drm_sched_start_timeout(sched); > - } > > if (!entity) > continue; > -- > 2.7.4 >