Am 21.12.18 um 21:36 schrieb Grodzovsky, Andrey: > > On 12/21/2018 01:37 PM, Christian König wrote: >> Am 20.12.18 um 20:23 schrieb Andrey Grodzovsky: >>> Decauple sched threads stop and start and ring mirror >>> list handling from the policy of what to do about the >>> guilty jobs. >>> When stoppping the sched thread and detaching sched fences >>> from non signaled HW fenes wait for all signaled HW fences >>> to complete before rerunning the jobs. >>> >>> v2: Fix resubmission of guilty job into HW after refactoring. >>> >>> v4: >>> Full restart for all the jobs, not only from guilty ring. >>> Extract karma increase into standalone function. >>> >>> v5: >>> Rework waiting for signaled jobs without relying on the job >>> struct itself as those might already be freed for non 'guilty' >>> job's schedulers. >>> Expose karma increase to drivers. >>> >>> Suggested-by: Christian Koenig <Christian.Koenig@xxxxxxx> >>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 +-- >>> drivers/gpu/drm/etnaviv/etnaviv_sched.c | 11 +- >>> drivers/gpu/drm/scheduler/sched_main.c | 188 >>> +++++++++++++++++++---------- >>> drivers/gpu/drm/v3d/v3d_sched.c | 12 +- >>> include/drm/gpu_scheduler.h | 10 +- >>> 5 files changed, 151 insertions(+), 88 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> index 8a078f4..a4bd2d3 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >>> @@ -3298,12 +3298,10 @@ static int >>> amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, >>> if (!ring || !ring->sched.thread) >>> continue; >>> - kthread_park(ring->sched.thread); >>> + drm_sched_stop(&ring->sched, job ? &job->base : NULL); >>> - if (job && job->base.sched != &ring->sched) >>> - continue; >>> - >>> - drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); >>> + if(job) >>> + drm_sched_increase_karma(&job->base); >> Since we dropped the "job && job->base.sched != &ring->sched" check >> above this will now increase the jobs karma multiple times. >> >> Maybe just move that outside of the loop. >> >>> /* after all hw jobs are reset, hw fence is meaningless, >>> so force_completion */ >>> amdgpu_fence_driver_force_completion(ring); >>> @@ -3454,14 +3452,10 @@ static void >>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev, >>> if (!ring || !ring->sched.thread) >>> continue; >>> - /* only need recovery sched of the given job's ring >>> - * or all rings (in the case @job is NULL) >>> - * after above amdgpu_reset accomplished >>> - */ >>> - if ((!job || job->base.sched == &ring->sched) && >>> !adev->asic_reset_res) >>> - drm_sched_job_recovery(&ring->sched); >>> + if (!adev->asic_reset_res) >>> + drm_sched_resubmit_jobs(&ring->sched); >>> - kthread_unpark(ring->sched.thread); >>> + drm_sched_start(&ring->sched, !adev->asic_reset_res); >>> } >>> if (!amdgpu_device_has_dc_support(adev)) { >>> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> index 49a6763..6f1268f 100644 >>> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c >>> @@ -109,16 +109,19 @@ static void etnaviv_sched_timedout_job(struct >>> drm_sched_job *sched_job) >>> } >>> /* block scheduler */ >>> - kthread_park(gpu->sched.thread); >>> - drm_sched_hw_job_reset(&gpu->sched, sched_job); >>> + drm_sched_stop(&gpu->sched, sched_job); >>> + >>> + if(sched_job) >>> + drm_sched_increase_karma(sched_job); >>> /* get the GPU back into the init state */ >>> etnaviv_core_dump(gpu); >>> etnaviv_gpu_recover_hang(gpu); >>> + drm_sched_resubmit_jobs(&gpu->sched); >>> + >>> /* restart scheduler after GPU is usable again */ >>> - drm_sched_job_recovery(&gpu->sched); >>> - kthread_unpark(gpu->sched.thread); >>> + drm_sched_start(&gpu->sched, true); >>> } >>> static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) >>> diff --git a/drivers/gpu/drm/scheduler/sched_main.c >>> b/drivers/gpu/drm/scheduler/sched_main.c >>> index dbb6906..b5c5bee 100644 >>> --- a/drivers/gpu/drm/scheduler/sched_main.c >>> +++ b/drivers/gpu/drm/scheduler/sched_main.c >>> @@ -60,8 +60,6 @@ >>> static void drm_sched_process_job(struct dma_fence *f, struct >>> dma_fence_cb *cb); >>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job >>> *s_job); >>> - >>> /** >>> * drm_sched_rq_init - initialize a given run queue struct >>> * >>> @@ -335,6 +333,42 @@ static void drm_sched_job_timedout(struct >>> work_struct *work) >>> spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> } >> Kernel doc here would be nice to have. >> >>> +void drm_sched_increase_karma(struct drm_sched_job *bad) >>> +{ >>> + int i; >>> + struct drm_sched_entity *tmp; >>> + struct drm_sched_entity *entity; >>> + struct drm_gpu_scheduler *sched = bad->sched; >>> + >>> + /* don't increase @bad's karma if it's from KERNEL RQ, >>> + * because sometimes GPU hang would cause kernel jobs (like VM >>> updating jobs) >>> + * corrupt but keep in mind that kernel jobs always considered >>> good. >>> + */ >>> + if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { >>> + atomic_inc(&bad->karma); >>> + for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; >>> + i++) { >>> + struct drm_sched_rq *rq = &sched->sched_rq[i]; >>> + >>> + spin_lock(&rq->lock); >>> + list_for_each_entry_safe(entity, tmp, &rq->entities, >>> list) { >>> + if (bad->s_fence->scheduled.context == >>> + entity->fence_context) { >>> + if (atomic_read(&bad->karma) > >>> + bad->sched->hang_limit) >>> + if (entity->guilty) >>> + atomic_set(entity->guilty, 1); >>> + break; >>> + } >>> + } >>> + spin_unlock(&rq->lock); >>> + if (&entity->list != &rq->entities) >>> + break; >>> + } >>> + } >>> +} >>> +EXPORT_SYMBOL(drm_sched_increase_karma); >>> + >>> /** >>> * drm_sched_hw_job_reset - stop the scheduler if it contains the >>> bad job >>> * >>> @@ -342,13 +376,22 @@ static void drm_sched_job_timedout(struct >>> work_struct *work) >>> * @bad: bad scheduler job >>> * >>> */ >>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct >>> drm_sched_job *bad) >>> +void drm_sched_stop(struct drm_gpu_scheduler *sched, struct >>> drm_sched_job *bad) >>> { >>> - struct drm_sched_job *s_job; >>> - struct drm_sched_entity *entity, *tmp; >>> + struct drm_sched_job *s_job, *last_job; >>> unsigned long flags; >>> - int i; >>> + struct dma_fence *wait_fence = NULL; >>> + int r; >>> + >>> + kthread_park(sched->thread); >>> + /* >>> + * Verify all the signaled jobs in mirror list are removed from >>> the ring >>> + * by waiting for their respective scheduler fences to signal. >>> + * Continually repeat traversing the ring mirror list until no >>> more signaled >>> + * fences are found >>> + */ >>> +retry_wait: >>> spin_lock_irqsave(&sched->job_list_lock, flags); >>> list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, >>> node) { >>> if (s_job->s_fence->parent && >>> @@ -357,35 +400,45 @@ void drm_sched_hw_job_reset(struct >>> drm_gpu_scheduler *sched, struct drm_sched_jo >>> dma_fence_put(s_job->s_fence->parent); >>> s_job->s_fence->parent = NULL; >>> atomic_dec(&sched->hw_rq_count); >>> + } else { >>> + wait_fence = dma_fence_get(&s_job->s_fence->finished); >>> + last_job = s_job; >>> + break; >>> } >>> } >>> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> - if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { >>> - atomic_inc(&bad->karma); >>> - /* don't increase @bad's karma if it's from KERNEL RQ, >>> - * becuase sometimes GPU hang would cause kernel jobs (like >>> VM updating jobs) >>> - * corrupt but keep in mind that kernel jobs always >>> considered good. >>> - */ >>> - for (i = DRM_SCHED_PRIORITY_MIN; i < >>> DRM_SCHED_PRIORITY_KERNEL; i++ ) { >>> - struct drm_sched_rq *rq = &sched->sched_rq[i]; >>> + /* No signaled jobs in the ring, its safe to proceed to ASIC >>> reset */ >>> + if (!wait_fence) { >>> + spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> + goto done; >>> + } >>> - spin_lock(&rq->lock); >>> - list_for_each_entry_safe(entity, tmp, &rq->entities, >>> list) { >>> - if (bad->s_fence->scheduled.context == >>> entity->fence_context) { >>> - if (atomic_read(&bad->karma) > >>> bad->sched->hang_limit) >>> - if (entity->guilty) >>> - atomic_set(entity->guilty, 1); >>> - break; >>> - } >>> - } >>> - spin_unlock(&rq->lock); >>> - if (&entity->list != &rq->entities) >>> - break; >>> + /* Restore removed cb since removing again already removed cb is >>> undefined */ >>> + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, >>> node) { >>> + if(s_job == last_job) >>> + break; >> Need to double check after the holidays, but you should be able to use >> list_for_each_entry_continue here. > I think it should work - kind of traversing back on all the jobs we just > removed their callbacks. Wrapping your head around stuff again after the holidays sometimes shows new problems we have completely missed :) Adding the callbacks again won't work because we have freed up our reference to the hardware fence above: > dma_fence_put(s_job->s_fence->parent); > s_job->s_fence->parent = NULL; We need to drop this or we would never be able to re-add the fences. But I'm still not sure if we shouldn't rely on the correct order of signaling and simplify this instead. Regards, Christian. > > Andrey > >>> + >>> + if (s_job->s_fence->parent) { >>> + r = dma_fence_add_callback(s_job->s_fence->parent, >>> + &s_job->s_fence->cb, >>> + drm_sched_process_job); >>> + if (r) >>> + DRM_ERROR("fence restore callback failed (%d)\n", >>> + r); >> When you fail to add the callback this means that you need to call >> call drm_sched_process_job manually here. >> >>> } >>> } >>> + spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> + >>> + dma_fence_wait(wait_fence, false); >>> + dma_fence_put(wait_fence); >>> + wait_fence = NULL; >>> + >>> + goto retry_wait; >>> + >>> +done: >>> + return; >> Drop the done: label and return directly above. >> >> Apart from all those nit picks that starts to look like it should work, >> Christian. >> >>> } >>> -EXPORT_SYMBOL(drm_sched_hw_job_reset); >>> +EXPORT_SYMBOL(drm_sched_stop); >>> /** >>> * drm_sched_job_recovery - recover jobs after a reset >>> @@ -393,33 +446,21 @@ EXPORT_SYMBOL(drm_sched_hw_job_reset); >>> * @sched: scheduler instance >>> * >>> */ >>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) >>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool >>> full_recovery) >>> { >>> struct drm_sched_job *s_job, *tmp; >>> - bool found_guilty = false; >>> unsigned long flags; >>> int r; >>> + if (!full_recovery) >>> + goto unpark; >>> + >>> spin_lock_irqsave(&sched->job_list_lock, flags); >>> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >>> node) { >>> struct drm_sched_fence *s_fence = s_job->s_fence; >>> - struct dma_fence *fence; >>> - uint64_t guilty_context; >>> - >>> - if (!found_guilty && atomic_read(&s_job->karma) > >>> sched->hang_limit) { >>> - found_guilty = true; >>> - guilty_context = s_job->s_fence->scheduled.context; >>> - } >>> - >>> - if (found_guilty && s_job->s_fence->scheduled.context == >>> guilty_context) >>> - dma_fence_set_error(&s_fence->finished, -ECANCELED); >>> - >>> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> - fence = sched->ops->run_job(s_job); >>> - atomic_inc(&sched->hw_rq_count); >>> + struct dma_fence *fence = s_job->s_fence->parent; >>> if (fence) { >>> - s_fence->parent = dma_fence_get(fence); >>> r = dma_fence_add_callback(fence, &s_fence->cb, >>> drm_sched_process_job); >>> if (r == -ENOENT) >>> @@ -427,18 +468,47 @@ void drm_sched_job_recovery(struct >>> drm_gpu_scheduler *sched) >>> else if (r) >>> DRM_ERROR("fence add callback failed (%d)\n", >>> r); >>> - dma_fence_put(fence); >>> - } else { >>> - if (s_fence->finished.error < 0) >>> - drm_sched_expel_job_unlocked(s_job); >>> + } else >>> drm_sched_process_job(NULL, &s_fence->cb); >>> - } >>> - spin_lock_irqsave(&sched->job_list_lock, flags); >>> } >>> + >>> drm_sched_start_timeout(sched); >>> spin_unlock_irqrestore(&sched->job_list_lock, flags); >>> + >>> +unpark: >>> + kthread_unpark(sched->thread); >>> +} >>> +EXPORT_SYMBOL(drm_sched_start); >>> + >>> +/** >>> + * drm_sched_resubmit_jobs - helper to relunch job from mirror ring >>> list >>> + * >>> + * @sched: scheduler instance >>> + * >>> + */ >>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) >>> +{ >>> + struct drm_sched_job *s_job, *tmp; >>> + uint64_t guilty_context; >>> + bool found_guilty = false; >>> + >>> + /*TODO DO we need spinlock here ? */ >>> + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >>> node) { >>> + struct drm_sched_fence *s_fence = s_job->s_fence; >>> + >>> + if (!found_guilty && atomic_read(&s_job->karma) > >>> sched->hang_limit) { >>> + found_guilty = true; >>> + guilty_context = s_job->s_fence->scheduled.context; >>> + } >>> + >>> + if (found_guilty && s_job->s_fence->scheduled.context == >>> guilty_context) >>> + dma_fence_set_error(&s_fence->finished, -ECANCELED); >>> + >>> + s_job->s_fence->parent = sched->ops->run_job(s_job); >>> + atomic_inc(&sched->hw_rq_count); >>> + } >>> } >>> -EXPORT_SYMBOL(drm_sched_job_recovery); >>> +EXPORT_SYMBOL(drm_sched_resubmit_jobs); >>> /** >>> * drm_sched_job_init - init a scheduler job >>> @@ -634,26 +704,14 @@ static int drm_sched_main(void *param) >>> DRM_ERROR("fence add callback failed (%d)\n", >>> r); >>> dma_fence_put(fence); >>> - } else { >>> - if (s_fence->finished.error < 0) >>> - drm_sched_expel_job_unlocked(sched_job); >>> + } else >>> drm_sched_process_job(NULL, &s_fence->cb); >>> - } >>> wake_up(&sched->job_scheduled); >>> } >>> return 0; >>> } >>> -static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job) >>> -{ >>> - struct drm_gpu_scheduler *sched = s_job->sched; >>> - >>> - spin_lock(&sched->job_list_lock); >>> - list_del_init(&s_job->node); >>> - spin_unlock(&sched->job_list_lock); >>> -} >>> - >>> /** >>> * drm_sched_init - Init a gpu scheduler instance >>> * >>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c >>> b/drivers/gpu/drm/v3d/v3d_sched.c >>> index 445b2ef..f76d9ed 100644 >>> --- a/drivers/gpu/drm/v3d/v3d_sched.c >>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c >>> @@ -178,18 +178,22 @@ v3d_job_timedout(struct drm_sched_job *sched_job) >>> for (q = 0; q < V3D_MAX_QUEUES; q++) { >>> struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; >>> - kthread_park(sched->thread); >>> - drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? >>> + drm_sched_stop(sched, (sched_job->sched == sched ? >>> sched_job : NULL)); >>> + >>> + if(sched_job) >>> + drm_sched_increase_karma(sched_job); >>> } >>> /* get the GPU back into the init state */ >>> v3d_reset(v3d); >>> + for (q = 0; q < V3D_MAX_QUEUES; q++) >>> + drm_sched_resubmit_jobs(sched_job->sched); >>> + >>> /* Unblock schedulers and restart their jobs. */ >>> for (q = 0; q < V3D_MAX_QUEUES; q++) { >>> - drm_sched_job_recovery(&v3d->queue[q].sched); >>> - kthread_unpark(v3d->queue[q].sched.thread); >>> + drm_sched_start(&v3d->queue[q].sched, true); >>> } >>> mutex_unlock(&v3d->reset_lock); >>> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >>> index 47e1979..5ab2d97 100644 >>> --- a/include/drm/gpu_scheduler.h >>> +++ b/include/drm/gpu_scheduler.h >>> @@ -175,6 +175,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct >>> dma_fence *f); >>> * finished to remove the job from the >>> * @drm_gpu_scheduler.ring_mirror_list. >>> * @node: used to append this struct to the >>> @drm_gpu_scheduler.ring_mirror_list. >>> + * @finish_node: used in a list to wait on before resetting the >>> scheduler >>> * @id: a unique id assigned to each job scheduled on the scheduler. >>> * @karma: increment on every hang caused by this job. If this >>> exceeds the hang >>> * limit of the scheduler then the job is marked guilty and >>> will not >>> @@ -193,6 +194,7 @@ struct drm_sched_job { >>> struct dma_fence_cb finish_cb; >>> struct work_struct finish_work; >>> struct list_head node; >>> + struct list_head finish_node; >>> uint64_t id; >>> atomic_t karma; >>> enum drm_sched_priority s_priority; >>> @@ -298,9 +300,11 @@ int drm_sched_job_init(struct drm_sched_job *job, >>> void *owner); >>> void drm_sched_job_cleanup(struct drm_sched_job *job); >>> void drm_sched_wakeup(struct drm_gpu_scheduler *sched); >>> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, >>> - struct drm_sched_job *job); >>> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); >>> +void drm_sched_stop(struct drm_gpu_scheduler *sched, >>> + struct drm_sched_job *job); >>> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool >>> full_recovery); >>> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); >>> +void drm_sched_increase_karma(struct drm_sched_job *bad); >>> bool drm_sched_dependency_optimized(struct dma_fence* fence, >>> struct drm_sched_entity *entity); >>> void drm_sched_fault(struct drm_gpu_scheduler *sched); >> _______________________________________________ >> amd-gfx mailing list >> amd-gfx@xxxxxxxxxxxxxxxxxxxxx >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel