On Tue, Jun 29, 2021 at 09:34:56AM +0200, Boris Brezillon wrote: > Mali Midgard/Bifrost GPUs have 3 hardware queues but only a global GPU > reset. This leads to extra complexity when we need to synchronize timeout > works with the reset work. One solution to address that is to have an > ordered workqueue at the driver level that will be used by the different > schedulers to queue their timeout work. Thanks to the serialization > provided by the ordered workqueue we are guaranteed that timeout > handlers are executed sequentially, and can thus easily reset the GPU > from the timeout handler without extra synchronization. > > v5: > * Add a new paragraph to the timedout_job() method > > v3: > * New patch > > v4: > * Actually use the timeout_wq to queue the timeout work > > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> > Reviewed-by: Steven Price <steven.price@xxxxxxx> > Reviewed-by: Lucas Stach <l.stach@xxxxxxxxxxxxxx> > Cc: Qiang Yu <yuq825@xxxxxxxxx> > Cc: Emma Anholt <emma@xxxxxxxxxx> > Cc: Alex Deucher <alexander.deucher@xxxxxxx> > Cc: "Christian König" <christian.koenig@xxxxxxx> Acked-by: Daniel Vetter <daniel.vetter@xxxxxxxx> Also since I'm occasionally blinded by my own pride, add suggested-by: me? I did spend quite a bit pondering how to untangle your various lockdep splats in the trd handler :-) Cheers, Daniel > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- > drivers/gpu/drm/etnaviv/etnaviv_sched.c | 3 ++- > drivers/gpu/drm/lima/lima_sched.c | 3 ++- > drivers/gpu/drm/panfrost/panfrost_job.c | 3 ++- > drivers/gpu/drm/scheduler/sched_main.c | 14 +++++++++----- > drivers/gpu/drm/v3d/v3d_sched.c | 10 +++++----- > include/drm/gpu_scheduler.h | 23 ++++++++++++++++++++++- > 7 files changed, 43 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > index 47ea46859618..532636ea20bc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -488,7 +488,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, > > r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, > num_hw_submission, amdgpu_job_hang_limit, > - timeout, sched_score, ring->name); > + timeout, NULL, sched_score, ring->name); > if (r) { > DRM_ERROR("Failed to create scheduler on ring %s.\n", > ring->name); > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > index 19826e504efc..feb6da1b6ceb 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > @@ -190,7 +190,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) > > ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, > etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, > - msecs_to_jiffies(500), NULL, dev_name(gpu->dev)); > + msecs_to_jiffies(500), NULL, NULL, > + dev_name(gpu->dev)); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c > index ecf3267334ff..dba8329937a3 100644 > --- a/drivers/gpu/drm/lima/lima_sched.c > +++ b/drivers/gpu/drm/lima/lima_sched.c > @@ -508,7 +508,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) > INIT_WORK(&pipe->recover_work, lima_sched_recover_work); > > return drm_sched_init(&pipe->base, &lima_sched_ops, 1, > - lima_job_hang_limit, msecs_to_jiffies(timeout), > + lima_job_hang_limit, > + msecs_to_jiffies(timeout), NULL, > NULL, name); > } > > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > index 682f2161b999..8ff79fd49577 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -626,7 +626,8 @@ int panfrost_job_init(struct panfrost_device *pfdev) > > ret = drm_sched_init(&js->queue[j].sched, > &panfrost_sched_ops, > - 1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS), > + 1, 0, > + msecs_to_jiffies(JOB_TIMEOUT_MS), NULL, > NULL, "pan_js"); > if (ret) { > dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index c0a2f8f8d472..3e180f0d4305 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -232,7 +232,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) > { > if (sched->timeout != MAX_SCHEDULE_TIMEOUT && > !list_empty(&sched->pending_list)) > - schedule_delayed_work(&sched->work_tdr, sched->timeout); > + queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout); > } > > /** > @@ -244,7 +244,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched) > */ > void drm_sched_fault(struct drm_gpu_scheduler *sched) > { > - mod_delayed_work(system_wq, &sched->work_tdr, 0); > + mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0); > } > EXPORT_SYMBOL(drm_sched_fault); > > @@ -270,7 +270,7 @@ unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched) > * Modify the timeout to an arbitrarily large value. This also prevents > * the timeout to be restarted when new submissions arrive > */ > - if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) > + if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT) > && time_after(sched_timeout, now)) > return sched_timeout - now; > else > @@ -294,7 +294,7 @@ void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, > if (list_empty(&sched->pending_list)) > cancel_delayed_work(&sched->work_tdr); > else > - mod_delayed_work(system_wq, &sched->work_tdr, remaining); > + mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining); > > spin_unlock(&sched->job_list_lock); > } > @@ -837,6 +837,8 @@ static int drm_sched_main(void *param) > * @hw_submission: number of hw submissions that can be in flight > * @hang_limit: number of times to allow a job to hang before dropping it > * @timeout: timeout value in jiffies for the scheduler > + * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is > + * used > * @score: optional score atomic shared with other schedulers > * @name: name used for debugging > * > @@ -844,7 +846,8 @@ static int drm_sched_main(void *param) > */ > int drm_sched_init(struct drm_gpu_scheduler *sched, > const struct drm_sched_backend_ops *ops, > - unsigned hw_submission, unsigned hang_limit, long timeout, > + unsigned hw_submission, unsigned hang_limit, > + long timeout, struct workqueue_struct *timeout_wq, > atomic_t *score, const char *name) > { > int i, ret; > @@ -852,6 +855,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > sched->hw_submission_limit = hw_submission; > sched->name = name; > sched->timeout = timeout; > + sched->timeout_wq = timeout_wq ? : system_wq; > sched->hang_limit = hang_limit; > sched->score = score ? score : &sched->_score; > for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++) > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c > index 8992480c88fa..a39bdd5cfc4f 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -402,7 +402,7 @@ v3d_sched_init(struct v3d_dev *v3d) > ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, > &v3d_bin_sched_ops, > hw_jobs_limit, job_hang_limit, > - msecs_to_jiffies(hang_limit_ms), > + msecs_to_jiffies(hang_limit_ms), NULL, > NULL, "v3d_bin"); > if (ret) { > dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret); > @@ -412,7 +412,7 @@ v3d_sched_init(struct v3d_dev *v3d) > ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, > &v3d_render_sched_ops, > hw_jobs_limit, job_hang_limit, > - msecs_to_jiffies(hang_limit_ms), > + msecs_to_jiffies(hang_limit_ms), NULL, > NULL, "v3d_render"); > if (ret) { > dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.", > @@ -424,7 +424,7 @@ v3d_sched_init(struct v3d_dev *v3d) > ret = drm_sched_init(&v3d->queue[V3D_TFU].sched, > &v3d_tfu_sched_ops, > hw_jobs_limit, job_hang_limit, > - msecs_to_jiffies(hang_limit_ms), > + msecs_to_jiffies(hang_limit_ms), NULL, > NULL, "v3d_tfu"); > if (ret) { > dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.", > @@ -437,7 +437,7 @@ v3d_sched_init(struct v3d_dev *v3d) > ret = drm_sched_init(&v3d->queue[V3D_CSD].sched, > &v3d_csd_sched_ops, > hw_jobs_limit, job_hang_limit, > - msecs_to_jiffies(hang_limit_ms), > + msecs_to_jiffies(hang_limit_ms), NULL, > NULL, "v3d_csd"); > if (ret) { > dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.", > @@ -449,7 +449,7 @@ v3d_sched_init(struct v3d_dev *v3d) > ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched, > &v3d_cache_clean_sched_ops, > hw_jobs_limit, job_hang_limit, > - msecs_to_jiffies(hang_limit_ms), > + msecs_to_jiffies(hang_limit_ms), NULL, > NULL, "v3d_cache_clean"); > if (ret) { > dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.", > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index 65700511e074..9c41904ffd83 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -253,6 +253,24 @@ struct drm_sched_backend_ops { > * 5. Restart the scheduler using drm_sched_start(). At that point, new > * jobs can be queued, and the scheduler thread is unblocked > * > + * Note that some GPUs have distinct hardware queues but need to reset > + * the GPU globally, which requires extra synchronization between the > + * timeout handler of the different &drm_gpu_scheduler. One way to > + * achieve this synchronization is to create an ordered workqueue > + * (using alloc_ordered_workqueue()) at the driver level, and pass this > + * queue to drm_sched_init(), to guarantee that timeout handlers are > + * executed sequentially. The above workflow needs to be slightly > + * adjusted in that case: > + * > + * 1. Stop all schedulers impacted by the reset using drm_sched_stop() > + * 2. Try to gracefully stop non-faulty jobs on all queues impacted by > + * the reset (optional) > + * 3. Issue a GPU reset on all faulty queues (driver-specific) > + * 4. Re-submit jobs on all schedulers impacted by the reset using > + * drm_sched_resubmit_jobs() > + * 5. Restart all schedulers that were stopped in step #1 using > + * drm_sched_start() > + * > * Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal, > * and the underlying driver has started or completed recovery. > * > @@ -283,6 +301,7 @@ struct drm_sched_backend_ops { > * finished. > * @hw_rq_count: the number of jobs currently in the hardware queue. > * @job_id_count: used to assign unique id to the each job. > + * @timeout_wq: workqueue used to queue @work_tdr > * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the > * timeout interval is over. > * @thread: the kthread on which the scheduler which run. > @@ -307,6 +326,7 @@ struct drm_gpu_scheduler { > wait_queue_head_t job_scheduled; > atomic_t hw_rq_count; > atomic64_t job_id_count; > + struct workqueue_struct *timeout_wq; > struct delayed_work work_tdr; > struct task_struct *thread; > struct list_head pending_list; > @@ -320,7 +340,8 @@ struct drm_gpu_scheduler { > > int drm_sched_init(struct drm_gpu_scheduler *sched, > const struct drm_sched_backend_ops *ops, > - uint32_t hw_submission, unsigned hang_limit, long timeout, > + uint32_t hw_submission, unsigned hang_limit, > + long timeout, struct workqueue_struct *timeout_wq, > atomic_t *score, const char *name); > > void drm_sched_fini(struct drm_gpu_scheduler *sched); > -- > 2.31.1 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch