if a job hang (more iterations exceeds the threshold) we consider the entity/ctx behind it as guilty, we kick out all jobs/entities in before sched_recovery. with this feature driver won't suffer infinite job resubmit if this job will always cause GPU hang. And a new module paramter "hang_limit" is introduced as threshold to let driver control how much time we allow a job hang before we tag its context guilty. Change-Id: I6c08ba126b985232e9b67530c304f09a5aeee78d Signed-off-by: Monk Liu <Monk.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 + drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 15 ++++- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 81 ++++++++++++++++++++++++++- drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 2 + 7 files changed, 103 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6312cc5..f3c3c36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -111,6 +111,7 @@ extern int amdgpu_prim_buf_per_se; extern int amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; +extern int amdgpu_hang_limit; #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -1148,7 +1149,7 @@ struct amdgpu_job { /* user fence handling */ uint64_t uf_addr; uint64_t uf_sequence; - + atomic_t karma; }; #define to_amdgpu_job(sched_job) \ container_of((sched_job), struct amdgpu_job, base) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 3947f63..0083153 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -64,7 +64,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) if (r) goto failed; - ctx->rings[i].entity.ptr_guilty = &ctx->guilty; /* kernel entity doesn't have ptr_guilty */ + ctx->rings[i].entity.ptr_guilty = &ctx->guilty; /* kernel context/entity doesn't have ptr_guilty assigned*/ } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5573792..0c51fb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2619,6 +2619,7 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) if (!ring || !ring->sched.thread) continue; + amd_sched_job_kickout_guilty(&ring->sched); amd_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 416908a..b999990 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -112,6 +112,7 @@ int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; int amdgpu_cntl_sb_buf_per_se = 0; int amdgpu_param_buf_per_se = 0; +int amdgpu_hang_limit = 0; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -237,6 +238,8 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); +MODULE_PARM_DESC(hang_limit, "how many loops allow a job hang (default 0)"); +module_param_named(hang_limit, amdgpu_hang_limit, int ,0444); static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 208da11..0209c96 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -186,9 +186,22 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) return fence; } +static void amdgpu_invalidate_job(struct amd_sched_job *sched_job) +{ + struct amdgpu_job *job; + + if (!sched_job || !sched_job->s_entity->ptr_guilty) + return; + + job = to_amdgpu_job(sched_job); + if (atomic_inc_return(&job->karma) > amdgpu_hang_limit) + *sched_job->s_entity->ptr_guilty = true; +} + const struct amd_sched_backend_ops amdgpu_sched_ops = { .dependency = amdgpu_job_dependency, .run_job = amdgpu_job_run, .timedout_job = amdgpu_job_timedout, - .free_job = amdgpu_job_free_cb + .free_job = amdgpu_job_free_cb, + .invalidate_job = amdgpu_invalidate_job, }; diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 9100ca8..f671b1a 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -373,11 +373,87 @@ static void amd_sched_job_timedout(struct work_struct *work) job->sched->ops->timedout_job(job); } +static inline bool amd_sched_check_guilty(struct amd_sched_entity *entity) +{ + if (entity && entity->ptr_guilty != NULL) + return *entity->ptr_guilty; + + /* if sched_job->s_entity->ptr_guilty == NULL, means this is a kernel entity job */ + return false; +} + +void amd_sched_job_kickout_guilty(struct amd_gpu_scheduler *sched) +{ + struct amd_sched_job *s_job, *s_tmp; + struct amd_sched_rq *rq; + struct list_head guilty_head; + int i; + + INIT_LIST_HEAD(&guilty_head); + spin_lock(&sched->job_list_lock); + list_for_each_entry_safe(s_job, s_tmp, &sched->ring_mirror_list, node) + if (amd_sched_check_guilty(s_job->s_entity)) + list_move(&s_job->node, &guilty_head); + spin_unlock(&sched->job_list_lock); + + /* since free_job may cause wait/schedule, we'd better run it without spinlock + * TODO: maybe we can just remove all spinlock protection in this routine becuase + * this routine is invoked prior to job_recovery and kthread_unpark + */ + list_for_each_entry_safe(s_job, s_tmp, &guilty_head, node) { + /* the guilty job is fakely signaled, release the cs_wait on it + * + * TODO: we need to add more flags appended to FENCE_SIGNAL and + * change behavior of fence_wait to indicate this fence's signal + * is fake and due to gpu-reset, thus UMD will be acknowledged that CS_SUBMIT is + * failed and its context is invalid. + */ + + amd_sched_fence_finished(s_job->s_fence); + fence_put(&s_job->s_fence->finished); + } + + /* Go through all entities and signal all jobs from the guilty */ + for (i = AMD_SCHED_PRIORITY_MIN; i < AMD_SCHED_PRIORITY_MAX; i++) { + struct amd_sched_entity *entity, *e_tmp; + + if (i == AMD_SCHED_PRIORITY_KERNEL) + continue; /* kernel entity is always not gulity and can't be kickout */ + + rq = &sched->sched_rq[i]; + spin_lock(&rq->lock); + list_for_each_entry_safe(entity, e_tmp, &rq->entities, list) { + struct amd_sched_job *guilty_job; + + if (amd_sched_check_guilty(entity)) { + spin_lock(&entity->queue_lock); + while (!kfifo_is_empty(&entity->job_queue)) { + kfifo_out(&entity->job_queue, &guilty_job, sizeof(guilty_job)); + spin_unlock(&entity->queue_lock); + amd_sched_fence_finished(guilty_job->s_fence); + fence_put(&guilty_job->s_fence->finished); + spin_lock(&entity->queue_lock); + } + spin_unlock(&entity->queue_lock); + + list_del_init(&entity->list); + if (rq->current_entity == entity) + rq->current_entity = NULL; + } + } + spin_unlock(&rq->lock); + } +} + void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) { - struct amd_sched_job *s_job; + struct amd_sched_job *s_job, *first; spin_lock(&sched->job_list_lock); + /* for the first job, consider it as guilty */ + first = list_first_entry_or_null(&sched->ring_mirror_list, + struct amd_sched_job, node); + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) { if (s_job->s_fence->parent && fence_remove_callback(s_job->s_fence->parent, @@ -388,6 +464,9 @@ void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched) } atomic_set(&sched->hw_rq_count, 0); spin_unlock(&sched->job_list_lock); + + /* this will mark all entity behind this job's context as guilty */ + sched->ops->invalidate_job(first); } void amd_sched_job_recovery(struct amd_gpu_scheduler *sched) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h index ccbbcb0..ab644a6 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h @@ -106,6 +106,7 @@ struct amd_sched_backend_ops { struct fence *(*run_job)(struct amd_sched_job *sched_job); void (*timedout_job)(struct amd_sched_job *sched_job); void (*free_job)(struct amd_sched_job *sched_job); + void (*invalidate_job)(struct amd_sched_job *sched_job); }; enum amd_sched_priority { @@ -159,4 +160,5 @@ int amd_sched_job_init(struct amd_sched_job *job, void *owner); void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); +void amd_sched_job_kickout_guilty(struct amd_gpu_scheduler *sched); #endif -- 2.7.4