The GPU scheduler currently does not ensure that its pending_list is empty before performing various other teardown tasks in drm_sched_fini(). If there are still jobs in the pending_list, this is problematic because after scheduler teardown, no one will call backend_ops.free_job() anymore. This would, consequently, result in memory leaks. One way to solves this is to implement reference counting for struct drm_gpu_scheduler itself. Each job added to the pending_list takes a reference, each one removed drops a reference. This approach would keep the scheduler running even after users have called drm_sched_fini(), and it would ultimately stop after the last job has been removed from pending_list. Add reference counting to struct drm_gpu_scheduler. Move the teardown logic to __drm_sched_fini() and have drm_sched_fini() just also drop a reference. Suggested-by: Danilo Krummrich <dakr@xxxxxxxxxx> Signed-off-by: Philipp Stanner <pstanner@xxxxxxxxxx> --- Hi all, since the scheduler has many stake holders, I want this solution discussed as an RFC first. The advantage of this version would be that it does not block drm_sched_fini(), but the price paid for that is that the scheduler keeps running until all jobs are gone. Cheers, P. --- drivers/gpu/drm/scheduler/sched_main.c | 17 ++++++++++++++++- include/drm/gpu_scheduler.h | 5 +++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 7e90c9f95611..62b453c8ed76 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -99,6 +99,8 @@ int drm_sched_policy = DRM_SCHED_POLICY_FIFO; MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); module_param_named(sched_policy, drm_sched_policy, int, 0444); +static void __drm_sched_fini(struct kref *jobs_remaining); + static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched) { u32 credits; @@ -540,6 +542,7 @@ static void drm_sched_job_begin(struct drm_sched_job *s_job) spin_lock(&sched->job_list_lock); list_add_tail(&s_job->list, &sched->pending_list); + kref_get(&sched->jobs_remaining); drm_sched_start_timeout(sched); spin_unlock(&sched->job_list_lock); } @@ -564,6 +567,7 @@ static void drm_sched_job_timedout(struct work_struct *work) * is parked at which point it's safe. */ list_del_init(&job->list); + kref_put(&sched->jobs_remaining, __drm_sched_fini); spin_unlock(&sched->job_list_lock); status = job->sched->ops->timedout_job(job); @@ -637,6 +641,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad) */ spin_lock(&sched->job_list_lock); list_del_init(&s_job->list); + kref_put(&sched->jobs_remaining, __drm_sched_fini); spin_unlock(&sched->job_list_lock); /* @@ -1084,6 +1089,7 @@ drm_sched_get_finished_job(struct drm_gpu_scheduler *sched) if (job && dma_fence_is_signaled(&job->s_fence->finished)) { /* remove job from pending_list */ list_del_init(&job->list); + kref_put(&sched->jobs_remaining, __drm_sched_fini); /* cancel this job's TO timer */ cancel_delayed_work(&sched->work_tdr); @@ -1303,6 +1309,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, init_waitqueue_head(&sched->job_scheduled); INIT_LIST_HEAD(&sched->pending_list); spin_lock_init(&sched->job_list_lock); + kref_init(&sched->jobs_remaining); atomic_set(&sched->credit_count, 0); INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); INIT_WORK(&sched->work_run_job, drm_sched_run_job_work); @@ -1334,11 +1341,14 @@ EXPORT_SYMBOL(drm_sched_init); * * Tears down and cleans up the scheduler. */ -void drm_sched_fini(struct drm_gpu_scheduler *sched) +static void __drm_sched_fini(struct kref *jobs_remaining) { + struct drm_gpu_scheduler *sched; struct drm_sched_entity *s_entity; int i; + sched = container_of(jobs_remaining, struct drm_gpu_scheduler, jobs_remaining); + drm_sched_wqueue_stop(sched); for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { @@ -1368,6 +1378,11 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) kfree(sched->sched_rq); sched->sched_rq = NULL; } + +void drm_sched_fini(struct drm_gpu_scheduler *sched) +{ + kref_put(&sched->jobs_remaining, __drm_sched_fini); +} EXPORT_SYMBOL(drm_sched_fini); /** diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 5acc64954a88..b7fad8294861 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -29,6 +29,7 @@ #include <linux/completion.h> #include <linux/xarray.h> #include <linux/workqueue.h> +#include <linux/kref.h> #define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000) @@ -495,6 +496,9 @@ struct drm_sched_backend_ops { * waits on this wait queue until all the scheduled jobs are * finished. * @job_id_count: used to assign unique id to the each job. + * @jobs_remaining: submitted jobs take a refcount of the scheduler to prevent it + * from terminating before the last job has been removed from + * @pending_list. * @submit_wq: workqueue used to queue @work_run_job and @work_free_job * @timeout_wq: workqueue used to queue @work_tdr * @work_run_job: work which calls run_job op of each scheduler. @@ -525,6 +529,7 @@ struct drm_gpu_scheduler { struct drm_sched_rq **sched_rq; wait_queue_head_t job_scheduled; atomic64_t job_id_count; + struct kref jobs_remaining; struct workqueue_struct *submit_wq; struct workqueue_struct *timeout_wq; struct work_struct work_run_job; -- 2.46.0