On Thu, Sep 14, 2023 at 12:23:35AM -0400, Luben Tuikov wrote: > On 2023-09-14 00:18, Luben Tuikov wrote: > > On 2023-09-11 22:16, Matthew Brost wrote: > >> Rather than a global modparam for scheduling policy, move the scheduling > >> policy to scheduler / entity so user can control each scheduler / entity > >> policy. > >> > >> v2: > >> - s/DRM_SCHED_POLICY_MAX/DRM_SCHED_POLICY_COUNT (Luben) > >> - Only include policy in scheduler (Luben) > >> > >> Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> > >> --- > >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + > >> drivers/gpu/drm/etnaviv/etnaviv_sched.c | 3 ++- > >> drivers/gpu/drm/lima/lima_sched.c | 3 ++- > >> drivers/gpu/drm/msm/msm_ringbuffer.c | 3 ++- > >> drivers/gpu/drm/nouveau/nouveau_sched.c | 3 ++- > >> drivers/gpu/drm/panfrost/panfrost_job.c | 3 ++- > >> drivers/gpu/drm/scheduler/sched_entity.c | 24 ++++++++++++++++++---- > >> drivers/gpu/drm/scheduler/sched_main.c | 23 +++++++++++++++------ > >> drivers/gpu/drm/v3d/v3d_sched.c | 15 +++++++++----- > >> include/drm/gpu_scheduler.h | 20 ++++++++++++------ > >> 10 files changed, 72 insertions(+), 26 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > >> index c83a76bccc1d..ecb00991dd51 100644 > >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > >> @@ -2309,6 +2309,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) > >> ring->num_hw_submission, 0, > >> timeout, adev->reset_domain->wq, > >> ring->sched_score, ring->name, > >> + DRM_SCHED_POLICY_DEFAULT, > >> adev->dev); > >> if (r) { > >> DRM_ERROR("Failed to create scheduler on ring %s.\n", > >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > >> index 618a804ddc34..3646f995ca94 100644 > >> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > >> @@ -137,7 +137,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) > >> ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, NULL, > >> etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, > >> msecs_to_jiffies(500), NULL, NULL, > >> - dev_name(gpu->dev), gpu->dev); > >> + dev_name(gpu->dev), DRM_SCHED_POLICY_DEFAULT, > >> + gpu->dev); > >> if (ret) > >> return ret; > >> > >> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c > >> index 8d858aed0e56..465d4bf3882b 100644 > >> --- a/drivers/gpu/drm/lima/lima_sched.c > >> +++ b/drivers/gpu/drm/lima/lima_sched.c > >> @@ -491,7 +491,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name) > >> return drm_sched_init(&pipe->base, &lima_sched_ops, NULL, 1, > >> lima_job_hang_limit, > >> msecs_to_jiffies(timeout), NULL, > >> - NULL, name, pipe->ldev->dev); > >> + NULL, name, DRM_SCHED_POLICY_DEFAULT, > >> + pipe->ldev->dev); > >> } > >> > >> void lima_sched_pipe_fini(struct lima_sched_pipe *pipe) > >> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c > >> index b8865e61b40f..f45e674a0aaf 100644 > >> --- a/drivers/gpu/drm/msm/msm_ringbuffer.c > >> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c > >> @@ -96,7 +96,8 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, > >> > >> ret = drm_sched_init(&ring->sched, &msm_sched_ops, NULL, > >> num_hw_submissions, 0, sched_timeout, > >> - NULL, NULL, to_msm_bo(ring->bo)->name, gpu->dev->dev); > >> + NULL, NULL, to_msm_bo(ring->bo)->name, > >> + DRM_SCHED_POLICY_DEFAULT, gpu->dev->dev); > >> if (ret) { > >> goto fail; > >> } > >> diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c > >> index d458c2227d4f..70e497e40c70 100644 > >> --- a/drivers/gpu/drm/nouveau/nouveau_sched.c > >> +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c > >> @@ -431,7 +431,8 @@ int nouveau_sched_init(struct nouveau_drm *drm) > >> > >> return drm_sched_init(sched, &nouveau_sched_ops, NULL, > >> NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, > >> - NULL, NULL, "nouveau_sched", drm->dev->dev); > >> + NULL, NULL, "nouveau_sched", > >> + DRM_SCHED_POLICY_DEFAULT, drm->dev->dev); > >> } > >> > >> void nouveau_sched_fini(struct nouveau_drm *drm) > >> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > >> index 326ca1ddf1d7..ad36bf3a4699 100644 > >> --- a/drivers/gpu/drm/panfrost/panfrost_job.c > >> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > >> @@ -835,7 +835,8 @@ int panfrost_job_init(struct panfrost_device *pfdev) > >> nentries, 0, > >> msecs_to_jiffies(JOB_TIMEOUT_MS), > >> pfdev->reset.wq, > >> - NULL, "pan_js", pfdev->dev); > >> + NULL, "pan_js", DRM_SCHED_POLICY_DEFAULT, > >> + pfdev->dev); > >> if (ret) { > >> dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret); > >> goto err_sched; > >> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c > >> index a42763e1429d..65a972b52eda 100644 > >> --- a/drivers/gpu/drm/scheduler/sched_entity.c > >> +++ b/drivers/gpu/drm/scheduler/sched_entity.c > >> @@ -33,6 +33,20 @@ > >> #define to_drm_sched_job(sched_job) \ > >> container_of((sched_job), struct drm_sched_job, queue_node) > >> > >> +static bool bad_policies(struct drm_gpu_scheduler **sched_list, > >> + unsigned int num_sched_list) > > > > Rename the function to the status quo, > > drm_sched_policy_mismatch(... > > Will do. > >> +{ > >> + enum drm_sched_policy sched_policy = sched_list[0]->sched_policy; > >> + unsigned int i; > >> + > >> + /* All schedule policies must match */ > >> + for (i = 1; i < num_sched_list; ++i) > >> + if (sched_policy != sched_list[i]->sched_policy) > >> + return true; > >> + > >> + return false; > >> +} > >> + > >> /** > >> * drm_sched_entity_init - Init a context entity used by scheduler when > >> * submit to HW ring. > >> @@ -62,7 +76,8 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, > >> unsigned int num_sched_list, > >> atomic_t *guilty) > >> { > >> - if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0]))) > >> + if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])) || > >> + bad_policies(sched_list, num_sched_list)) > >> return -EINVAL; > >> > >> memset(entity, 0, sizeof(struct drm_sched_entity)); > >> @@ -486,7 +501,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity) > >> * Update the entity's location in the min heap according to > >> * the timestamp of the next job, if any. > >> */ > >> - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) { > >> + if (entity->rq->sched->sched_policy == DRM_SCHED_POLICY_FIFO) { > >> struct drm_sched_job *next; > >> > >> next = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); > >> @@ -558,7 +573,8 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) > >> void drm_sched_entity_push_job(struct drm_sched_job *sched_job) > >> { > >> struct drm_sched_entity *entity = sched_job->entity; > >> - bool first; > >> + bool first, fifo = entity->rq->sched->sched_policy == > >> + DRM_SCHED_POLICY_FIFO; > >> ktime_t submit_ts; > >> > >> trace_drm_sched_job(sched_job, entity); > >> @@ -587,7 +603,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job) > >> drm_sched_rq_add_entity(entity->rq, entity); > >> spin_unlock(&entity->rq_lock); > >> > >> - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) > >> + if (fifo) > >> drm_sched_rq_update_fifo(entity, submit_ts); > >> > >> drm_sched_wakeup_if_can_queue(entity->rq->sched); > >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > >> index 614e8c97a622..545d5298c086 100644 > >> --- a/drivers/gpu/drm/scheduler/sched_main.c > >> +++ b/drivers/gpu/drm/scheduler/sched_main.c > >> @@ -66,14 +66,14 @@ > >> #define to_drm_sched_job(sched_job) \ > >> container_of((sched_job), struct drm_sched_job, queue_node) > >> > >> -int drm_sched_policy = DRM_SCHED_POLICY_FIFO; > >> +int default_drm_sched_policy = DRM_SCHED_POLICY_FIFO; > >> > >> /** > >> * DOC: sched_policy (int) > >> * Used to override default entities scheduling policy in a run queue. > >> */ > >> -MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); > >> -module_param_named(sched_policy, drm_sched_policy, int, 0444); > >> +MODULE_PARM_DESC(sched_policy, "Specify the default scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default)."); > > > > Note, that you don't need to add "default" in the text as it is already there at the very end "FIFO (default)." > > Else, it gets confusing what is meant by "default". Like this: > > > > Specify the default scheduling policy for entities on a run-queue, 1 = Round Robin, 2 = FIFO (default). > > > > See "default" appear twice and it creates confusion? We don't need our internal "default" play to get > > exported all the way to the casual user reading this. It is much clear, however, > > > > Specify the scheduling policy for entities on a run-queue, 1 = Round Robin, 2 = FIFO (default). > > > > To mean, if unset, the default one would be used. But this is all internal code stuff. > > > > So I'd say leave this one alone. > > Ok. > >> +module_param_named(sched_policy, default_drm_sched_policy, int, 0444); > > > > Put "default" as a postfix: > > default_drm_sched_policy --> drm_sched_policy_default > > Sure. > >> > >> static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a, > >> const struct rb_node *b) > >> @@ -177,7 +177,7 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, > >> if (rq->current_entity == entity) > >> rq->current_entity = NULL; > >> > >> - if (drm_sched_policy == DRM_SCHED_POLICY_FIFO) > >> + if (rq->sched->sched_policy == DRM_SCHED_POLICY_FIFO) > >> drm_sched_rq_remove_fifo_locked(entity); > >> > >> spin_unlock(&rq->lock); > >> @@ -898,7 +898,7 @@ drm_sched_select_entity(struct drm_gpu_scheduler *sched) > >> > >> /* Kernel run queue has higher priority than normal run queue*/ > >> for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { > >> - entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ? > >> + entity = sched->sched_policy == DRM_SCHED_POLICY_FIFO ? > >> drm_sched_rq_select_entity_fifo(&sched->sched_rq[i]) : > >> drm_sched_rq_select_entity_rr(&sched->sched_rq[i]); > >> if (entity) > >> @@ -1071,6 +1071,7 @@ static void drm_sched_main(struct work_struct *w) > >> * used > >> * @score: optional score atomic shared with other schedulers > >> * @name: name used for debugging > >> + * @sched_policy: schedule policy > >> * @dev: target &struct device > >> * > >> * Return 0 on success, otherwise error code. > >> @@ -1080,9 +1081,15 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > >> struct workqueue_struct *submit_wq, > >> unsigned hw_submission, unsigned hang_limit, > >> long timeout, struct workqueue_struct *timeout_wq, > >> - atomic_t *score, const char *name, struct device *dev) > >> + atomic_t *score, const char *name, > >> + enum drm_sched_policy sched_policy, > >> + struct device *dev) > >> { > >> int i; > >> + > >> + if (sched_policy >= DRM_SCHED_POLICY_COUNT) > >> + return -EINVAL; > >> + > >> sched->ops = ops; > >> sched->hw_submission_limit = hw_submission; > >> sched->name = name; > >> @@ -1092,6 +1099,10 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > >> sched->hang_limit = hang_limit; > >> sched->score = score ? score : &sched->_score; > >> sched->dev = dev; > >> + if (sched_policy == DRM_SCHED_POLICY_DEFAULT) > >> + sched->sched_policy = default_drm_sched_policy; > >> + else > >> + sched->sched_policy = sched_policy; > > Note also that here you can use a ternary operator as opposed to an if-control. > > sched->sched_policy = sched_policy == DRM_SCHED_POLICY_UNSET ? > drm_sched_policy_default : sched_policy; Sure, will fix in next rev. Matt > > -- > Regards, > Luben >