Am 19.10.18 um 22:52 schrieb Andrey Grodzovsky: > Problem: > A particular scheduler may become unsuable (underlying HW) after > some event (e.g. GPU reset). If it's later chosen by > the get free sched. policy a command will fail to be > submitted. > > Fix: > Add a driver specific callback to report the sched status so > rq with bad sched can be avoided in favor of working one or > none in which case job init will fail. > > v2: Switch from driver callback to flag in scheduler. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- > drivers/gpu/drm/etnaviv/etnaviv_sched.c | 2 +- > drivers/gpu/drm/scheduler/sched_entity.c | 9 ++++++++- > drivers/gpu/drm/scheduler/sched_main.c | 10 +++++++++- > drivers/gpu/drm/v3d/v3d_sched.c | 4 ++-- > include/drm/gpu_scheduler.h | 5 ++++- > 6 files changed, 25 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > index 5448cf2..bf845b0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -450,7 +450,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, > > r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, > num_hw_submission, amdgpu_job_hang_limit, > - timeout, ring->name); > + timeout, ring->name, false); > if (r) { > DRM_ERROR("Failed to create scheduler on ring %s.\n", > ring->name); > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > index f8c5f1e..9dca347 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > @@ -178,7 +178,7 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) > > ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, > etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, > - msecs_to_jiffies(500), dev_name(gpu->dev)); > + msecs_to_jiffies(500), dev_name(gpu->dev), true); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c > index 3e22a54..ba54c30 100644 > --- a/drivers/gpu/drm/scheduler/sched_entity.c > +++ b/drivers/gpu/drm/scheduler/sched_entity.c > @@ -130,7 +130,14 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) > int i; > > for (i = 0; i < entity->num_rq_list; ++i) { > - num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs); > + struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched; > + > + if (!entity->rq_list[i]->sched->ready) { > + DRM_WARN("sched%s is not ready, skipping", sched->name); > + continue; > + } > + > + num_jobs = atomic_read(&sched->num_jobs); > if (num_jobs < min_jobs) { > min_jobs = num_jobs; > rq = entity->rq_list[i]; > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 63b997d..772adec 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job, > struct drm_gpu_scheduler *sched; > > drm_sched_entity_select_rq(entity); > + if (!entity->rq) > + return -ENOENT; > + > sched = entity->rq->sched; > > job->sched = sched; > @@ -598,6 +601,7 @@ static int drm_sched_main(void *param) > * @hang_limit: number of times to allow a job to hang before dropping it > * @timeout: timeout value in jiffies for the scheduler > * @name: name used for debugging > + * @ready: marks if the underlying HW is ready to work > * > * Return 0 on success, otherwise error code. > */ > @@ -606,7 +610,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > unsigned hw_submission, > unsigned hang_limit, > long timeout, > - const char *name) > + const char *name, > + bool ready) Please drop the ready flag here. We should consider a scheduler ready as soon as it is initialized. Apart from that looks good to me, Christian. > { > int i; > sched->ops = ops; > @@ -633,6 +638,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > return PTR_ERR(sched->thread); > } > > + sched->ready = ready; > return 0; > } > EXPORT_SYMBOL(drm_sched_init); > @@ -648,5 +654,7 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) > { > if (sched->thread) > kthread_stop(sched->thread); > + > + sched->ready = false; > } > EXPORT_SYMBOL(drm_sched_fini); > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c > index 80b641f..7cedb5f 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -212,7 +212,7 @@ v3d_sched_init(struct v3d_dev *v3d) > &v3d_sched_ops, > hw_jobs_limit, job_hang_limit, > msecs_to_jiffies(hang_limit_ms), > - "v3d_bin"); > + "v3d_bin", true); > if (ret) { > dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret); > return ret; > @@ -222,7 +222,7 @@ v3d_sched_init(struct v3d_dev *v3d) > &v3d_sched_ops, > hw_jobs_limit, job_hang_limit, > msecs_to_jiffies(hang_limit_ms), > - "v3d_render"); > + "v3d_render", true); > if (ret) { > dev_err(v3d->dev, "Failed to create render scheduler: %d.", > ret); > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index 0684dcd..037caea 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -264,6 +264,7 @@ struct drm_sched_backend_ops { > * @hang_limit: once the hangs by a job crosses this limit then it is marked > * guilty and it will be considered for scheduling further. > * @num_jobs: the number of jobs in queue in the scheduler > + * @ready: marks if the underlying HW is ready to work > * > * One scheduler is implemented for each hardware ring. > */ > @@ -283,12 +284,14 @@ struct drm_gpu_scheduler { > spinlock_t job_list_lock; > int hang_limit; > atomic_t num_jobs; > + bool ready; > }; > > int drm_sched_init(struct drm_gpu_scheduler *sched, > const struct drm_sched_backend_ops *ops, > uint32_t hw_submission, unsigned hang_limit, long timeout, > - const char *name); > + const char *name, > + bool ready); > void drm_sched_fini(struct drm_gpu_scheduler *sched); > int drm_sched_job_init(struct drm_sched_job *job, > struct drm_sched_entity *entity, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx