Am 18.10.18 um 20:44 schrieb Andrey Grodzovsky: > Problem: > A particular scheduler may become unsuable (underlying HW) after > some event (e.g. GPU reset). If it's later chosen by > the get free sched. policy a command will fail to be > submitted. > > Fix: > Add a driver specific callback to report the sced. status so > rq with bad sched. can be avoided in favor of working one or > none in which case job init will fail. I would rather go with the approach of moving the ready flag completely into the scheduler instead. Christian. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 9 ++++++++- > drivers/gpu/drm/etnaviv/etnaviv_sched.c | 3 ++- > drivers/gpu/drm/scheduler/sched_entity.c | 11 ++++++++++- > drivers/gpu/drm/scheduler/sched_main.c | 8 +++++++- > drivers/gpu/drm/v3d/v3d_sched.c | 3 ++- > include/drm/gpu_scheduler.h | 5 ++++- > 6 files changed, 33 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > index 5448cf2..bbfe7f501 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c > @@ -404,6 +404,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, > return 0; > } > > +static bool amdgpu_ring_ready(struct drm_gpu_scheduler *sched) > +{ > + struct amdgpu_ring *ring = to_amdgpu_ring(sched); > + > + return ring->ready; > +} > + > /** > * amdgpu_fence_driver_init_ring - init the fence driver > * for the requested ring. > @@ -450,7 +457,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, > > r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, > num_hw_submission, amdgpu_job_hang_limit, > - timeout, ring->name); > + timeout, ring->name, amdgpu_ring_ready); > if (r) { > DRM_ERROR("Failed to create scheduler on ring %s.\n", > ring->name); > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > index f8c5f1e..5094013 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c > @@ -178,7 +178,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu) > > ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops, > etnaviv_hw_jobs_limit, etnaviv_job_hang_limit, > - msecs_to_jiffies(500), dev_name(gpu->dev)); > + msecs_to_jiffies(500), dev_name(gpu->dev), > + NULL); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c > index 3e22a54..320c77a 100644 > --- a/drivers/gpu/drm/scheduler/sched_entity.c > +++ b/drivers/gpu/drm/scheduler/sched_entity.c > @@ -130,7 +130,16 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) > int i; > > for (i = 0; i < entity->num_rq_list; ++i) { > - num_jobs = atomic_read(&entity->rq_list[i]->sched->num_jobs); > + struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched; > + > + if (entity->rq_list[i]->sched->ready && > + !entity->rq_list[i]->sched->ready(sched)) { > + DRM_WARN("sched%s is not ready, skipping", sched->name); > + > + continue; > + } > + > + num_jobs = atomic_read(&sched->num_jobs); > if (num_jobs < min_jobs) { > min_jobs = num_jobs; > rq = entity->rq_list[i]; > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 63b997d..6b151f2 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -420,6 +420,9 @@ int drm_sched_job_init(struct drm_sched_job *job, > struct drm_gpu_scheduler *sched; > > drm_sched_entity_select_rq(entity); > + if (!entity->rq) > + return -ENOENT; > + > sched = entity->rq->sched; > > job->sched = sched; > @@ -606,7 +609,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > unsigned hw_submission, > unsigned hang_limit, > long timeout, > - const char *name) > + const char *name, > + bool (*ready)(struct drm_gpu_scheduler *sched)) > { > int i; > sched->ops = ops; > @@ -633,6 +637,8 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, > return PTR_ERR(sched->thread); > } > > + sched->ready = ready; > + > return 0; > } > EXPORT_SYMBOL(drm_sched_init); > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c > index 80b641f..e06cc0d 100644 > --- a/drivers/gpu/drm/v3d/v3d_sched.c > +++ b/drivers/gpu/drm/v3d/v3d_sched.c > @@ -212,7 +212,8 @@ v3d_sched_init(struct v3d_dev *v3d) > &v3d_sched_ops, > hw_jobs_limit, job_hang_limit, > msecs_to_jiffies(hang_limit_ms), > - "v3d_bin"); > + "v3d_bin", > + NULL); > if (ret) { > dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret); > return ret; > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index 0684dcd..a6e18fe 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -283,12 +283,15 @@ struct drm_gpu_scheduler { > spinlock_t job_list_lock; > int hang_limit; > atomic_t num_jobs; > + > + bool (*ready)(struct drm_gpu_scheduler *sched); > }; > > int drm_sched_init(struct drm_gpu_scheduler *sched, > const struct drm_sched_backend_ops *ops, > uint32_t hw_submission, unsigned hang_limit, long timeout, > - const char *name); > + const char *name, > + bool (*ready)(struct drm_gpu_scheduler *sched)); > void drm_sched_fini(struct drm_gpu_scheduler *sched); > int drm_sched_job_init(struct drm_sched_job *job, > struct drm_sched_entity *entity, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx