Am 17.12.18 um 17:57 schrieb Grodzovsky, Andrey: > > On 12/17/2018 10:27 AM, Christian König wrote: >> Am 10.12.18 um 22:43 schrieb Andrey Grodzovsky: >>> Decauple sched threads stop and start and ring mirror >>> list handling from the policy of what to do about the >>> guilty jobs. >>> When stoppping the sched thread and detaching sched fences >>> from non signaled HW fenes wait for all signaled HW fences >>> to complete before rerunning the jobs. >>> >>> v2: Fix resubmission of guilty job into HW after refactoring. >>> >>> Suggested-by: Christian Koenig <Christian.Koenig@xxxxxxx> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 +++-- >>> drivers/gpu/drm/etnaviv/etnaviv_sched.c | 8 +-- >>> drivers/gpu/drm/scheduler/sched_main.c | 110 >>> ++++++++++++++++++----------- >>> drivers/gpu/drm/v3d/v3d_sched.c | 11 +-- >>> include/drm/gpu_scheduler.h | 10 ++- >>> 5 files changed, 95 insertions(+), 61 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> index ef36cc5..42111d5 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> @@ -3292,17 +3292,16 @@ static int >>> amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, >>> /* block all schedulers and reset given job's ring */ >>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>> struct amdgpu_ring *ring = adev->rings[i]; >>> + bool park_only = job && job->base.sched != &ring->sched; >>> if (!ring || !ring->sched.thread) >>> continue; >>> - kthread_park(ring->sched.thread); >>> + drm_sched_stop(&ring->sched, job ? &job->base : NULL, >>> park_only); >>> - if (job && job->base.sched != &ring->sched) >>> + if (park_only) >>> continue; >>> - drm_sched_hw_job_reset(&ring->sched, job ? &job->base : >>> NULL); >>> - >>> /* after all hw jobs are reset, hw fence is meaningless, so >>> force_completion */ >>> amdgpu_fence_driver_force_completion(ring); >>> } >>> @@ -3445,6 +3444,7 @@ static void >>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev, >>> struct amdgpu_job *job) >>> { >>> int i; >>> + bool unpark_only; >>> for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { >>> struct amdgpu_ring *ring = adev->rings[i]; >>> @@ -3456,10 +3456,13 @@ static void >>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev, >>> * or all rings (in the case @job is NULL) >>> * after above amdgpu_reset accomplished >>> */ >>> - if ((!job || job->base.sched == &ring->sched) && >>> !adev->asic_reset_res) >>> - drm_sched_job_recovery(&ring->sched); >>> + unpark_only = (job && job->base.sched != &ring->sched) || >>> + adev->asic_reset_res; >>> + >>> + if (!unpark_only) >>> + drm_sched_resubmit_jobs(&ring->sched); >>> - kthread_unpark(ring->sched.thread); >>> + drm_sched_start(&ring->sched, unpark_only); >>> } >>> if (!amdgpu_device_has_dc_support(adev)) { >>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> index 49a6763..fab3b51 100644 >>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> @@ -109,16 +109,16 @@ static void etnaviv_sched_timedout_job(struct >>> drm_sched_job *sched_job) >>> } >>> /* block scheduler */ >>> - kthread_park(gpu->sched.thread); >>> - drm_sched_hw_job_reset(&gpu->sched, sched_job); >>> + drm_sched_stop(&gpu->sched, sched_job, false); >>> /* get the GPU back into the init state */ >>> etnaviv_core_dump(gpu); >>> etnaviv_gpu_recover_hang(gpu); >>> + drm_sched_resubmit_jobs(&gpu->sched); >>> + >>> /* restart scheduler after GPU is usable again */ >>> - drm_sched_job_recovery(&gpu->sched); >>> - kthread_unpark(gpu->sched.thread); >>> + drm_sched_start(&gpu->sched); >>> } >>> static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c >>> b/drivers/gpu/drm/scheduler/sched_main.c >>> index dbb6906..cdf95e2 100644 >>> --- a/drivers/gpu/drm/scheduler/sched_main.c >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c >>> @@ -60,8 +60,6 @@ >>> static void drm_sched_process_job(struct dma_fence *f, struct >>> dma_fence_cb *cb); >>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job >>> *s_job); >>> - >>> /** >>> * drm_sched_rq_init - initialize a given run queue struct >>> * >>> @@ -342,13 +340,21 @@ static void drm_sched_job_timedout(struct >>> work_struct *work) >>> * @bad: bad scheduler job >>> * >>> */ >>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct >>> drm_sched_job *bad) >>> +void drm_sched_stop(struct drm_gpu_scheduler *sched, struct >>> drm_sched_job *bad, >>> + bool park_only) >>> { >>> struct drm_sched_job *s_job; >>> struct drm_sched_entity *entity, *tmp; >>> unsigned long flags; >>> + struct list_head wait_list; >>> int i; >>> + kthread_park(sched->thread); >>> + if (park_only) >>> + return; >> Removing the callback needs to be done for all engines, not just the >> one where the bad job is on. > Is it because you assume that during full GPU reset we might kill > healthy jobs who were still in progress in HW pipe and so we need to > manually recover them > just as we do with the guilty job's engine ? Yes, exactly. Christian. > > Andrey > >>> + >>> + INIT_LIST_HEAD(&wait_list); >>> + >>> spin_lock_irqsave(&sched->job_list_lock, flags); >>> list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, >>> node) { >>> if (s_job->s_fence->parent && >>> @@ -358,9 +364,24 @@ void drm_sched_hw_job_reset(struct >>> drm_gpu_scheduler *sched, struct drm_sched_jo >>> s_job->s_fence->parent = NULL; >>> atomic_dec(&sched->hw_rq_count); >>> } >>> + else { >>> + /* TODO Is it get/put neccessey here ? */ >>> + dma_fence_get(&s_job->s_fence->finished); >>> + list_add(&s_job->finish_node, &wait_list); >>> + } >>> } >>> spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> + /* >>> + * Verify all the signaled jobs in mirror list are removed from >>> the ring >>> + * We rely on the fact that any finish_work in progress will >>> wait for this >>> + * handler to complete before releasing all of the jobs we iterate. >>> + */ >>> + list_for_each_entry(s_job, &wait_list, finish_node) { >>> + dma_fence_wait(&s_job->s_fence->finished, false); >>> + dma_fence_put(&s_job->s_fence->finished); >>> + } >>> + >> Increasing a bad jobs karma should be a separate function. >> >> Christian. >> >>> if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { >>> atomic_inc(&bad->karma); >>> /* don't increase @bad's karma if it's from KERNEL RQ, >>> @@ -385,7 +406,7 @@ void drm_sched_hw_job_reset(struct >>> drm_gpu_scheduler *sched, struct drm_sched_jo >>> } >>> } >>> } >>> -EXPORT_SYMBOL(drm_sched_hw_job_reset); >>> +EXPORT_SYMBOL(drm_sched_stop); >>> /** >>> * drm_sched_job_recovery - recover jobs after a reset >>> @@ -393,33 +414,21 @@ EXPORT_SYMBOL(drm_sched_hw_job_reset); >>> * @sched: scheduler instance >>> * >>> */ >>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) >>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) >>> { >>> struct drm_sched_job *s_job, *tmp; >>> - bool found_guilty = false; >>> unsigned long flags; >>> int r; >>> - spin_lock_irqsave(&sched->job_list_lock, flags); >>> + if (unpark_only) >>> + goto unpark; >>> + >>> + spin_lock_irqsave(&sched->job_list_lock, flags); >>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >>> node) { >>> struct drm_sched_fence *s_fence = s_job->s_fence; >>> - struct dma_fence *fence; >>> - uint64_t guilty_context; >>> - >>> - if (!found_guilty && atomic_read(&s_job->karma) > >>> sched->hang_limit) { >>> - found_guilty = true; >>> - guilty_context = s_job->s_fence->scheduled.context; >>> - } >>> - >>> - if (found_guilty && s_job->s_fence->scheduled.context == >>> guilty_context) >>> - dma_fence_set_error(&s_fence->finished, -ECANCELED); >>> - >>> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> - fence = sched->ops->run_job(s_job); >>> - atomic_inc(&sched->hw_rq_count); >>> + struct dma_fence *fence = s_job->s_fence->parent; >>> if (fence) { >>> - s_fence->parent = dma_fence_get(fence); >>> r = dma_fence_add_callback(fence, &s_fence->cb, >>> drm_sched_process_job); >>> if (r == -ENOENT) >>> @@ -427,18 +436,47 @@ void drm_sched_job_recovery(struct >>> drm_gpu_scheduler *sched) >>> else if (r) >>> DRM_ERROR("fence add callback failed (%d)\n", >>> r); >>> - dma_fence_put(fence); >>> - } else { >>> - if (s_fence->finished.error < 0) >>> - drm_sched_expel_job_unlocked(s_job); >>> + } else >>> drm_sched_process_job(NULL, &s_fence->cb); >>> - } >>> - spin_lock_irqsave(&sched->job_list_lock, flags); >>> } >>> + >>> drm_sched_start_timeout(sched); >>> spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> + >>> +unpark: >>> + kthread_unpark(sched->thread); >>> } >>> -EXPORT_SYMBOL(drm_sched_job_recovery); >>> +EXPORT_SYMBOL(drm_sched_start); >>> + >>> +/** >>> + * drm_sched_resubmit_jobs - helper to relunch job from mirror ring >>> list >>> + * >>> + * @sched: scheduler instance >>> + * >>> + */ >>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) >>> +{ >>> + struct drm_sched_job *s_job, *tmp; >>> + uint64_t guilty_context; >>> + bool found_guilty = false; >>> + >>> + /*TODO DO we need spinlock here ? */ >>> + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >>> node) { >>> + struct drm_sched_fence *s_fence = s_job->s_fence; >>> + >>> + if (!found_guilty && atomic_read(&s_job->karma) > >>> sched->hang_limit) { >>> + found_guilty = true; >>> + guilty_context = s_job->s_fence->scheduled.context; >>> + } >>> + >>> + if (found_guilty && s_job->s_fence->scheduled.context == >>> guilty_context) >>> + dma_fence_set_error(&s_fence->finished, -ECANCELED); >>> + >>> + s_job->s_fence->parent = sched->ops->run_job(s_job); >>> + atomic_inc(&sched->hw_rq_count); >>> + } >>> +} >>> +EXPORT_SYMBOL(drm_sched_resubmit_jobs); >>> /** >>> * drm_sched_job_init - init a scheduler job >>> @@ -634,26 +672,14 @@ static int drm_sched_main(void *param) >>> DRM_ERROR("fence add callback failed (%d)\n", >>> r); >>> dma_fence_put(fence); >>> - } else { >>> - if (s_fence->finished.error < 0) >>> - drm_sched_expel_job_unlocked(sched_job); >>> + } else >>> drm_sched_process_job(NULL, &s_fence->cb); >>> - } >>> wake_up(&sched->job_scheduled); >>> } >>> return 0; >>> } >>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job) >>> -{ >>> - struct drm_gpu_scheduler *sched = s_job->sched; >>> - >>> - spin_lock(&sched->job_list_lock); >>> - list_del_init(&s_job->node); >>> - spin_unlock(&sched->job_list_lock); >>> -} >>> - >>> /** >>> * drm_sched_init - Init a gpu scheduler instance >>> * >>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c >>> b/drivers/gpu/drm/v3d/v3d_sched.c >>> index 445b2ef..f99346a 100644 >>> --- a/drivers/gpu/drm/v3d/v3d_sched.c >>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c >>> @@ -178,18 +178,19 @@ v3d_job_timedout(struct drm_sched_job *sched_job) >>> for (q = 0; q < V3D_MAX_QUEUES; q++) { >>> struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; >>> - kthread_park(sched->thread); >>> - drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? >>> - sched_job : NULL)); >>> + drm_sched_stop(sched, (sched_job->sched == sched ? >>> + sched_job : NULL), false); >>> } >>> /* get the GPU back into the init state */ >>> v3d_reset(v3d); >>> + for (q = 0; q < V3D_MAX_QUEUES; q++) >>> + drm_sched_resubmit_jobs(sched_job->sched); >>> + >>> /* Unblock schedulers and restart their jobs. */ >>> for (q = 0; q < V3D_MAX_QUEUES; q++) { >>> - drm_sched_job_recovery(&v3d->queue[q].sched); >>> - kthread_unpark(v3d->queue[q].sched.thread); >>> + drm_sched_start(&v3d->queue[q].sched, false); >>> } >>> mutex_unlock(&v3d->reset_lock); >>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >>> index 47e1979..c94b592 100644 >>> --- a/include/drm/gpu_scheduler.h >>> +++ b/include/drm/gpu_scheduler.h >>> @@ -175,6 +175,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct >>> dma_fence *f); >>> * finished to remove the job from the >>> * @drm_gpu_scheduler.ring_mirror_list. >>> * @node: used to append this struct to the >>> @drm_gpu_scheduler.ring_mirror_list. >>> + * @finish_node: used in a list to wait on before resetting the >>> scheduler >>> * @id: a unique id assigned to each job scheduled on the scheduler. >>> * @karma: increment on every hang caused by this job. If this >>> exceeds the hang >>> * limit of the scheduler then the job is marked guilty and >>> will not >>> @@ -193,6 +194,7 @@ struct drm_sched_job { >>> struct dma_fence_cb finish_cb; >>> struct work_struct finish_work; >>> struct list_head node; >>> + struct list_head finish_node; >>> uint64_t id; >>> atomic_t karma; >>> enum drm_sched_priority s_priority; >>> @@ -298,9 +300,11 @@ int drm_sched_job_init(struct drm_sched_job *job, >>> void *owner); >>> void drm_sched_job_cleanup(struct drm_sched_job *job); >>> void drm_sched_wakeup(struct drm_gpu_scheduler *sched); >>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, >>> - struct drm_sched_job *job); >>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); >>> +void drm_sched_stop(struct drm_gpu_scheduler *sched, >>> + struct drm_sched_job *job, >>> + bool park_only); >>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool >>> unpark_only); >>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); >>> bool drm_sched_dependency_optimized(struct dma_fence* fence, >>> struct drm_sched_entity *entity); >>> void drm_sched_fault(struct drm_gpu_scheduler *sched); _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel