Re: [PATCH v2] drm/scheduler: remove timeout work_struct from drm_sched_job

Nayan Deshmukh <nayan26deshmukh@xxxxxxxxx> · Wed, 26 Sep 2018 17:55:18 +0200

Hi Christian,

On Wed, Sep 26, 2018, 10:13 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote:
Am 26.09.2018 um 09:39 schrieb Lucas Stach:

> Hi Nayan,

>

> Am Mittwoch, den 26.09.2018, 02:09 +0900 schrieb Nayan Deshmukh:

>> having a delayed work item per job is redundant as we only need one

>> per scheduler to track the time out the currently executing job.

>>

>> v2: the first element of the ring mirror list is the currently

>> executing job so we don't need a additional variable for it

>>

>> Signed-off-by: Nayan Deshmukh <nayan26deshmukh@xxxxxxxxx>

>> Suggested-by: Christian König <christian.koenig@xxxxxxx>

>> ---

>>   drivers/gpu/drm/scheduler/sched_main.c | 31 ++++++++++++++++---------------

>>   include/drm/gpu_scheduler.h            |  6 +++---

>>   2 files changed, 19 insertions(+), 18 deletions(-)

>>

>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c

>> index 9ca741f3a0bc..4e8505d51795 100644

>> --- a/drivers/gpu/drm/scheduler/sched_main.c

>> +++ b/drivers/gpu/drm/scheduler/sched_main.c

>> @@ -197,19 +197,15 @@ static void drm_sched_job_finish(struct work_struct *work)

>>       * manages to find this job as the next job in the list, the fence

>>       * signaled check below will prevent the timeout to be restarted.

>>       */

>> -    cancel_delayed_work_sync(&s_job->work_tdr);

>> +    cancel_delayed_work_sync(&sched->work_tdr);

>>   

>>      spin_lock(&sched->job_list_lock);

>> -    /* queue TDR for next job */

>> -    if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&

>> -        !list_is_last(&s_job->node, &sched->ring_mirror_list)) {

>> -            struct drm_sched_job *next = list_next_entry(s_job, node);

>> -

>> -            if (!dma_fence_is_signaled(&next->s_fence->finished))

>> -                    schedule_delayed_work(&next->work_tdr, sched->timeout);

>> -    }

>>      /* remove job from ring_mirror_list */

>>      list_del(&s_job->node);

>> +    /* queue TDR for next job */

>> +    if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&

>> +        !list_empty(&sched->ring_mirror_list))

>> +            schedule_delayed_work(&sched->work_tdr, sched->timeout);

>>      spin_unlock(&sched->job_list_lock);

>>   

>>      dma_fence_put(&s_job->s_fence->finished);

>> @@ -236,16 +232,21 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job)

>>      if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&

>>          list_first_entry_or_null(&sched->ring_mirror_list,

>>                                   struct drm_sched_job, node) == s_job)

>> -            schedule_delayed_work(&s_job->work_tdr, sched->timeout);

>> +            schedule_delayed_work(&sched->work_tdr, sched->timeout);

>>      spin_unlock(&sched->job_list_lock);

>>   }

>>   

>>   static void drm_sched_job_timedout(struct work_struct *work)

>>   {

>> -    struct drm_sched_job *job = container_of(work, struct drm_sched_job,

>> -                                             work_tdr.work);

>> +    struct drm_gpu_scheduler *sched;

>> +    struct drm_sched_job *job;

>> +

>> +    sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);

>> +    job = list_first_entry_or_null(&sched->ring_mirror_list,

>> +                                   struct drm_sched_job, node);

>>   

>> -    job->sched->ops->timedout_job(job);

>> +    if (job)

>> +            job->sched->ops->timedout_job(job);

> I don't think this is fully robust. Jobs are only removed from the

> ring_mirror_list once the job_finish worker has run. If execution of

> this worker is delayed for any reason (though it's really unlikely for

> a delay as long as the job timeout to happen) you are blaming the wrong

> job here.

>

> So I think what you need to to is find the first job in the ring mirror

> list with an unsignaled finish fence to robustly find the stuck job.

Yeah, that is a known problem I've pointed out as well.

The issue is we have bug reports that this happened before the patch, 

but I'm not 100% sure how.

My suggestion is to move a good part of the logic from 

drm_sched_hw_job_reset() and drm_sched_job_recovery() into 

drm_sched_job_timedout().

E.g. we first call dma_fence_remove_callback() for each job and actually 

check the return value if the fence was already signaled.

If we find a signaled fence we abort and add the callback back to the 

ones where we removed it.

Nayan do you want to take care of this or should I take a look?
I can take care of it. 

Regards,
Nayan

Regards,

Christian.

>

> Regards,

> Lucas

>

>>   }

>>   

>>   /**

>> @@ -315,7 +316,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)

>>      s_job = list_first_entry_or_null(&sched->ring_mirror_list,

>>                                       struct drm_sched_job, node);

>>      if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)

>> -            schedule_delayed_work(&s_job->work_tdr, sched->timeout);

>> +            schedule_delayed_work(&sched->work_tdr, sched->timeout);

>>   

>>      list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {

>>              struct drm_sched_fence *s_fence = s_job->s_fence;

>> @@ -384,7 +385,6 @@ int drm_sched_job_init(struct drm_sched_job *job,

>>   

>>      INIT_WORK(&job->finish_work, drm_sched_job_finish);

>>      INIT_LIST_HEAD(&job->node);

>> -    INIT_DELAYED_WORK(&job->work_tdr, drm_sched_job_timedout);

>>   

>>      return 0;

>>   }

>> @@ -575,6 +575,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,

>>      INIT_LIST_HEAD(&sched->ring_mirror_list);

>>      spin_lock_init(&sched->job_list_lock);

>>      atomic_set(&sched->hw_rq_count, 0);

>> +    INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);

>>      atomic_set(&sched->num_jobs, 0);

>>      atomic64_set(&sched->job_id_count, 0);

>>   

>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h

>> index daec50f887b3..d87b268f1781 100644

>> --- a/include/drm/gpu_scheduler.h

>> +++ b/include/drm/gpu_scheduler.h

>> @@ -175,8 +175,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);

>>    *               finished to remove the job from the

>>    *               @drm_gpu_scheduler.ring_mirror_list.

>>    * @node: used to append this struct to the @drm_gpu_scheduler.ring_mirror_list.

>> - * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the timeout

>> - *            interval is over.

>>    * @id: a unique id assigned to each job scheduled on the scheduler.

>>    * @karma: increment on every hang caused by this job. If this exceeds the hang

>>    *         limit of the scheduler then the job is marked guilty and will not

>> @@ -195,7 +193,6 @@ struct drm_sched_job {

>>      struct dma_fence_cb             finish_cb;

>>      struct work_struct              finish_work;

>>      struct list_head                node;

>> -    struct delayed_work             work_tdr;

>>      uint64_t                        id;

>>      atomic_t                        karma;

>>      enum drm_sched_priority         s_priority;

>> @@ -259,6 +256,8 @@ struct drm_sched_backend_ops {

>>    *                 finished.

>>    * @hw_rq_count: the number of jobs currently in the hardware queue.

>>    * @job_id_count: used to assign unique id to the each job.

>> + * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the

>> + *            timeout interval is over.

>>    * @thread: the kthread on which the scheduler which run.

>>    * @ring_mirror_list: the list of jobs which are currently in the job queue.

>>    * @job_list_lock: lock to protect the ring_mirror_list.

>> @@ -278,6 +277,7 @@ struct drm_gpu_scheduler {

>>      wait_queue_head_t               job_scheduled;

>>      atomic_t                        hw_rq_count;

>>      atomic64_t                      job_id_count;

>> +    struct delayed_work             work_tdr;

>>      struct task_struct              *thread;

>>      struct list_head                ring_mirror_list;

>>      spinlock_t                      job_list_lock;

> _______________________________________________

> dri-devel mailing list

> dri-devel@xxxxxxxxxxxxxxxxxxxxx

> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/dri-devel