v5:
* Add a new paragraph to the timedout_job() method
v3:
* New patch
v4:
* Actually use the timeout_wq to queue the timeout work
Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx>
Reviewed-by: Steven Price <steven.price@xxxxxxx>
Reviewed-by: Lucas Stach <l.stach@xxxxxxxxxxxxxx>
Cc: Qiang Yu <yuq825@xxxxxxxxx>
Cc: Emma Anholt <emma@xxxxxxxxxx>
Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: "Christian König" <christian.koenig@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +-
drivers/gpu/drm/etnaviv/etnaviv_sched.c | 3 ++-
drivers/gpu/drm/lima/lima_sched.c | 3 ++-
drivers/gpu/drm/panfrost/panfrost_job.c | 3 ++-
drivers/gpu/drm/scheduler/sched_main.c | 14 +++++++++-----
drivers/gpu/drm/v3d/v3d_sched.c | 10 +++++-----
include/drm/gpu_scheduler.h | 23 ++++++++++++++++++++++-
7 files changed, 43 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 47ea46859618..532636ea20bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -488,7 +488,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
num_hw_submission, amdgpu_job_hang_limit,
- timeout, sched_score, ring->name);
+ timeout, NULL, sched_score, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 19826e504efc..feb6da1b6ceb 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -190,7 +190,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
- msecs_to_jiffies(500), NULL, dev_name(gpu->dev));
+ msecs_to_jiffies(500), NULL, NULL,
+ dev_name(gpu->dev));
if (ret)
return ret;
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index ecf3267334ff..dba8329937a3 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -508,7 +508,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
- lima_job_hang_limit, msecs_to_jiffies(timeout),
+ lima_job_hang_limit,
+ msecs_to_jiffies(timeout), NULL,
NULL, name);
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index 682f2161b999..8ff79fd49577 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -626,7 +626,8 @@ int panfrost_job_init(struct panfrost_device *pfdev)
ret = drm_sched_init(&js->queue[j].sched,
&panfrost_sched_ops,
- 1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS),
+ 1, 0,
+ msecs_to_jiffies(JOB_TIMEOUT_MS), NULL,
NULL, "pan_js");
if (ret) {
dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index c0a2f8f8d472..3e180f0d4305 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -232,7 +232,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
{
if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
!list_empty(&sched->pending_list))
- schedule_delayed_work(&sched->work_tdr, sched->timeout);
+ queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
}
/**
@@ -244,7 +244,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
*/
void drm_sched_fault(struct drm_gpu_scheduler *sched)
{
- mod_delayed_work(system_wq, &sched->work_tdr, 0);
+ mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
}
EXPORT_SYMBOL(drm_sched_fault);
@@ -270,7 +270,7 @@ unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
* Modify the timeout to an arbitrarily large value. This also prevents
* the timeout to be restarted when new submissions arrive
*/
- if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
+ if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
&& time_after(sched_timeout, now))
return sched_timeout - now;
else
@@ -294,7 +294,7 @@ void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
if (list_empty(&sched->pending_list))
cancel_delayed_work(&sched->work_tdr);
else
- mod_delayed_work(system_wq, &sched->work_tdr, remaining);
+ mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
spin_unlock(&sched->job_list_lock);
}
@@ -837,6 +837,8 @@ static int drm_sched_main(void *param)
* @hw_submission: number of hw submissions that can be in flight
* @hang_limit: number of times to allow a job to hang before dropping it
* @timeout: timeout value in jiffies for the scheduler
+ * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
+ * used
* @score: optional score atomic shared with other schedulers
* @name: name used for debugging
*
@@ -844,7 +846,8 @@ static int drm_sched_main(void *param)
*/
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
- unsigned hw_submission, unsigned hang_limit, long timeout,
+ unsigned hw_submission, unsigned hang_limit,
+ long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name)
{
int i, ret;
@@ -852,6 +855,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
sched->hw_submission_limit = hw_submission;
sched->name = name;
sched->timeout = timeout;
+ sched->timeout_wq = timeout_wq ? : system_wq;
sched->hang_limit = hang_limit;
sched->score = score ? score : &sched->_score;
for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 8992480c88fa..a39bdd5cfc4f 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -402,7 +402,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
&v3d_bin_sched_ops,
hw_jobs_limit, job_hang_limit,
- msecs_to_jiffies(hang_limit_ms),
+ msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_bin");
if (ret) {
dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret);
@@ -412,7 +412,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
&v3d_render_sched_ops,
hw_jobs_limit, job_hang_limit,
- msecs_to_jiffies(hang_limit_ms),
+ msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_render");
if (ret) {
dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.",
@@ -424,7 +424,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
&v3d_tfu_sched_ops,
hw_jobs_limit, job_hang_limit,
- msecs_to_jiffies(hang_limit_ms),
+ msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_tfu");
if (ret) {
dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.",
@@ -437,7 +437,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
&v3d_csd_sched_ops,
hw_jobs_limit, job_hang_limit,
- msecs_to_jiffies(hang_limit_ms),
+ msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_csd");
if (ret) {
dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.",
@@ -449,7 +449,7 @@ v3d_sched_init(struct v3d_dev *v3d)
ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
&v3d_cache_clean_sched_ops,
hw_jobs_limit, job_hang_limit,
- msecs_to_jiffies(hang_limit_ms),
+ msecs_to_jiffies(hang_limit_ms), NULL,
NULL, "v3d_cache_clean");
if (ret) {
dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.",
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 65700511e074..9c41904ffd83 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -253,6 +253,24 @@ struct drm_sched_backend_ops {
* 5. Restart the scheduler using drm_sched_start(). At that point, new
* jobs can be queued, and the scheduler thread is unblocked
*
+ * Note that some GPUs have distinct hardware queues but need to reset
+ * the GPU globally, which requires extra synchronization between the
+ * timeout handler of the different &drm_gpu_scheduler. One way to
+ * achieve this synchronization is to create an ordered workqueue
+ * (using alloc_ordered_workqueue()) at the driver level, and pass this
+ * queue to drm_sched_init(), to guarantee that timeout handlers are
+ * executed sequentially. The above workflow needs to be slightly
+ * adjusted in that case:
+ *
+ * 1. Stop all schedulers impacted by the reset using drm_sched_stop()
+ * 2. Try to gracefully stop non-faulty jobs on all queues impacted by
+ * the reset (optional)
+ * 3. Issue a GPU reset on all faulty queues (driver-specific)
+ * 4. Re-submit jobs on all schedulers impacted by the reset using
+ * drm_sched_resubmit_jobs()
+ * 5. Restart all schedulers that were stopped in step #1 using
+ * drm_sched_start()
+ *
* Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal,
* and the underlying driver has started or completed recovery.
*
@@ -283,6 +301,7 @@ struct drm_sched_backend_ops {
* finished.
* @hw_rq_count: the number of jobs currently in the hardware queue.
* @job_id_count: used to assign unique id to the each job.
+ * @timeout_wq: workqueue used to queue @work_tdr
* @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
* timeout interval is over.
* @thread: the kthread on which the scheduler which run.
@@ -307,6 +326,7 @@ struct drm_gpu_scheduler {
wait_queue_head_t job_scheduled;
atomic_t hw_rq_count;
atomic64_t job_id_count;
+ struct workqueue_struct *timeout_wq;
struct delayed_work work_tdr;
struct task_struct *thread;
struct list_head pending_list;
@@ -320,7 +340,8 @@ struct drm_gpu_scheduler {
int drm_sched_init(struct drm_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
- uint32_t hw_submission, unsigned hang_limit, long timeout,
+ uint32_t hw_submission, unsigned hang_limit,
+ long timeout, struct workqueue_struct *timeout_wq,
atomic_t *score, const char *name);
void drm_sched_fini(struct drm_gpu_scheduler *sched);