On 12/21/2018 01:37 PM, Christian König wrote: > Am 20.12.18 um 20:23 schrieb Andrey Grodzovsky: >> Decauple sched threads stop and start and ring mirror >> list handling from the policy of what to do about the >> guilty jobs. >> When stoppping the sched thread and detaching sched fences >> from non signaled HW fenes wait for all signaled HW fences >> to complete before rerunning the jobs. >> >> v2: Fix resubmission of guilty job into HW after refactoring. >> >> v4: >> Full restart for all the jobs, not only from guilty ring. >> Extract karma increase into standalone function. >> >> v5: >> Rework waiting for signaled jobs without relying on the job >> struct itself as those might already be freed for non 'guilty' >> job's schedulers. >> Expose karma increase to drivers. >> >> Suggested-by: Christian Koenig <Christian.Koenig@xxxxxxx> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> >> --- >> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 +-- >> drivers/gpu/drm/etnaviv/etnaviv_sched.c | 11 +- >> drivers/gpu/drm/scheduler/sched_main.c | 188 >> +++++++++++++++++++---------- >> drivers/gpu/drm/v3d/v3d_sched.c | 12 +- >> include/drm/gpu_scheduler.h | 10 +- >> 5 files changed, 151 insertions(+), 88 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> index 8a078f4..a4bd2d3 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c >> @@ -3298,12 +3298,10 @@ static int >> amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, >> if (!ring || !ring->sched.thread) >> continue; >> - kthread_park(ring->sched.thread); >> + drm_sched_stop(&ring->sched, job ? &job->base : NULL); >> - if (job && job->base.sched != &ring->sched) >> - continue; >> - >> - drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); >> + if(job) >> + drm_sched_increase_karma(&job->base); > > Since we dropped the "job && job->base.sched != &ring->sched" check > above this will now increase the jobs karma multiple times. > > Maybe just move that outside of the loop. > >> /* after all hw jobs are reset, hw fence is meaningless, >> so force_completion */ >> amdgpu_fence_driver_force_completion(ring); >> @@ -3454,14 +3452,10 @@ static void >> amdgpu_device_post_asic_reset(struct amdgpu_device *adev, >> if (!ring || !ring->sched.thread) >> continue; >> - /* only need recovery sched of the given job's ring >> - * or all rings (in the case @job is NULL) >> - * after above amdgpu_reset accomplished >> - */ >> - if ((!job || job->base.sched == &ring->sched) && >> !adev->asic_reset_res) >> - drm_sched_job_recovery(&ring->sched); >> + if (!adev->asic_reset_res) >> + drm_sched_resubmit_jobs(&ring->sched); >> - kthread_unpark(ring->sched.thread); >> + drm_sched_start(&ring->sched, !adev->asic_reset_res); >> } >> if (!amdgpu_device_has_dc_support(adev)) { >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c >> b/drivers/gpu/drm/etnaviv/etnaviv_sched.c >> index 49a6763..6f1268f 100644 >> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c >> @@ -109,16 +109,19 @@ static void etnaviv_sched_timedout_job(struct >> drm_sched_job *sched_job) >> } >> /* block scheduler */ >> - kthread_park(gpu->sched.thread); >> - drm_sched_hw_job_reset(&gpu->sched, sched_job); >> + drm_sched_stop(&gpu->sched, sched_job); >> + >> + if(sched_job) >> + drm_sched_increase_karma(sched_job); >> /* get the GPU back into the init state */ >> etnaviv_core_dump(gpu); >> etnaviv_gpu_recover_hang(gpu); >> + drm_sched_resubmit_jobs(&gpu->sched); >> + >> /* restart scheduler after GPU is usable again */ >> - drm_sched_job_recovery(&gpu->sched); >> - kthread_unpark(gpu->sched.thread); >> + drm_sched_start(&gpu->sched, true); >> } >> static void etnaviv_sched_free_job(struct drm_sched_job *sched_job) >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c >> b/drivers/gpu/drm/scheduler/sched_main.c >> index dbb6906..b5c5bee 100644 >> --- a/drivers/gpu/drm/scheduler/sched_main.c >> +++ b/drivers/gpu/drm/scheduler/sched_main.c >> @@ -60,8 +60,6 @@ >> static void drm_sched_process_job(struct dma_fence *f, struct >> dma_fence_cb *cb); >> -static void drm_sched_expel_job_unlocked(struct drm_sched_job >> *s_job); >> - >> /** >> * drm_sched_rq_init - initialize a given run queue struct >> * >> @@ -335,6 +333,42 @@ static void drm_sched_job_timedout(struct >> work_struct *work) >> spin_unlock_irqrestore(&sched->job_list_lock, flags); >> } > > Kernel doc here would be nice to have. > >> +void drm_sched_increase_karma(struct drm_sched_job *bad) >> +{ >> + int i; >> + struct drm_sched_entity *tmp; >> + struct drm_sched_entity *entity; >> + struct drm_gpu_scheduler *sched = bad->sched; >> + >> + /* don't increase @bad's karma if it's from KERNEL RQ, >> + * because sometimes GPU hang would cause kernel jobs (like VM >> updating jobs) >> + * corrupt but keep in mind that kernel jobs always considered >> good. >> + */ >> + if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { >> + atomic_inc(&bad->karma); >> + for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL; >> + i++) { >> + struct drm_sched_rq *rq = &sched->sched_rq[i]; >> + >> + spin_lock(&rq->lock); >> + list_for_each_entry_safe(entity, tmp, &rq->entities, >> list) { >> + if (bad->s_fence->scheduled.context == >> + entity->fence_context) { >> + if (atomic_read(&bad->karma) > >> + bad->sched->hang_limit) >> + if (entity->guilty) >> + atomic_set(entity->guilty, 1); >> + break; >> + } >> + } >> + spin_unlock(&rq->lock); >> + if (&entity->list != &rq->entities) >> + break; >> + } >> + } >> +} >> +EXPORT_SYMBOL(drm_sched_increase_karma); >> + >> /** >> * drm_sched_hw_job_reset - stop the scheduler if it contains the >> bad job >> * >> @@ -342,13 +376,22 @@ static void drm_sched_job_timedout(struct >> work_struct *work) >> * @bad: bad scheduler job >> * >> */ >> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct >> drm_sched_job *bad) >> +void drm_sched_stop(struct drm_gpu_scheduler *sched, struct >> drm_sched_job *bad) >> { >> - struct drm_sched_job *s_job; >> - struct drm_sched_entity *entity, *tmp; >> + struct drm_sched_job *s_job, *last_job; >> unsigned long flags; >> - int i; >> + struct dma_fence *wait_fence = NULL; >> + int r; >> + >> + kthread_park(sched->thread); >> + /* >> + * Verify all the signaled jobs in mirror list are removed from >> the ring >> + * by waiting for their respective scheduler fences to signal. >> + * Continually repeat traversing the ring mirror list until no >> more signaled >> + * fences are found >> + */ >> +retry_wait: >> spin_lock_irqsave(&sched->job_list_lock, flags); >> list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, >> node) { >> if (s_job->s_fence->parent && >> @@ -357,35 +400,45 @@ void drm_sched_hw_job_reset(struct >> drm_gpu_scheduler *sched, struct drm_sched_jo >> dma_fence_put(s_job->s_fence->parent); >> s_job->s_fence->parent = NULL; >> atomic_dec(&sched->hw_rq_count); >> + } else { >> + wait_fence = dma_fence_get(&s_job->s_fence->finished); >> + last_job = s_job; >> + break; >> } >> } >> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >> - if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) { >> - atomic_inc(&bad->karma); >> - /* don't increase @bad's karma if it's from KERNEL RQ, >> - * becuase sometimes GPU hang would cause kernel jobs (like >> VM updating jobs) >> - * corrupt but keep in mind that kernel jobs always >> considered good. >> - */ >> - for (i = DRM_SCHED_PRIORITY_MIN; i < >> DRM_SCHED_PRIORITY_KERNEL; i++ ) { >> - struct drm_sched_rq *rq = &sched->sched_rq[i]; >> + /* No signaled jobs in the ring, its safe to proceed to ASIC >> reset */ >> + if (!wait_fence) { >> + spin_unlock_irqrestore(&sched->job_list_lock, flags); >> + goto done; >> + } >> - spin_lock(&rq->lock); >> - list_for_each_entry_safe(entity, tmp, &rq->entities, >> list) { >> - if (bad->s_fence->scheduled.context == >> entity->fence_context) { >> - if (atomic_read(&bad->karma) > >> bad->sched->hang_limit) >> - if (entity->guilty) >> - atomic_set(entity->guilty, 1); >> - break; >> - } >> - } >> - spin_unlock(&rq->lock); >> - if (&entity->list != &rq->entities) >> - break; >> + /* Restore removed cb since removing again already removed cb is >> undefined */ >> + list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, >> node) { >> + if(s_job == last_job) >> + break; > > Need to double check after the holidays, but you should be able to use > list_for_each_entry_continue here. I think it should work - kind of traversing back on all the jobs we just removed their callbacks. Andrey > >> + >> + if (s_job->s_fence->parent) { >> + r = dma_fence_add_callback(s_job->s_fence->parent, >> + &s_job->s_fence->cb, >> + drm_sched_process_job); >> + if (r) >> + DRM_ERROR("fence restore callback failed (%d)\n", >> + r); > > When you fail to add the callback this means that you need to call > call drm_sched_process_job manually here. > >> } >> } >> + spin_unlock_irqrestore(&sched->job_list_lock, flags); >> + >> + dma_fence_wait(wait_fence, false); >> + dma_fence_put(wait_fence); >> + wait_fence = NULL; >> + >> + goto retry_wait; >> + >> +done: >> + return; > > Drop the done: label and return directly above. > > Apart from all those nit picks that starts to look like it should work, > Christian. > >> } >> -EXPORT_SYMBOL(drm_sched_hw_job_reset); >> +EXPORT_SYMBOL(drm_sched_stop); >> /** >> * drm_sched_job_recovery - recover jobs after a reset >> @@ -393,33 +446,21 @@ EXPORT_SYMBOL(drm_sched_hw_job_reset); >> * @sched: scheduler instance >> * >> */ >> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched) >> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool >> full_recovery) >> { >> struct drm_sched_job *s_job, *tmp; >> - bool found_guilty = false; >> unsigned long flags; >> int r; >> + if (!full_recovery) >> + goto unpark; >> + >> spin_lock_irqsave(&sched->job_list_lock, flags); >> list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >> node) { >> struct drm_sched_fence *s_fence = s_job->s_fence; >> - struct dma_fence *fence; >> - uint64_t guilty_context; >> - >> - if (!found_guilty && atomic_read(&s_job->karma) > >> sched->hang_limit) { >> - found_guilty = true; >> - guilty_context = s_job->s_fence->scheduled.context; >> - } >> - >> - if (found_guilty && s_job->s_fence->scheduled.context == >> guilty_context) >> - dma_fence_set_error(&s_fence->finished, -ECANCELED); >> - >> - spin_unlock_irqrestore(&sched->job_list_lock, flags); >> - fence = sched->ops->run_job(s_job); >> - atomic_inc(&sched->hw_rq_count); >> + struct dma_fence *fence = s_job->s_fence->parent; >> if (fence) { >> - s_fence->parent = dma_fence_get(fence); >> r = dma_fence_add_callback(fence, &s_fence->cb, >> drm_sched_process_job); >> if (r == -ENOENT) >> @@ -427,18 +468,47 @@ void drm_sched_job_recovery(struct >> drm_gpu_scheduler *sched) >> else if (r) >> DRM_ERROR("fence add callback failed (%d)\n", >> r); >> - dma_fence_put(fence); >> - } else { >> - if (s_fence->finished.error < 0) >> - drm_sched_expel_job_unlocked(s_job); >> + } else >> drm_sched_process_job(NULL, &s_fence->cb); >> - } >> - spin_lock_irqsave(&sched->job_list_lock, flags); >> } >> + >> drm_sched_start_timeout(sched); >> spin_unlock_irqrestore(&sched->job_list_lock, flags); >> + >> +unpark: >> + kthread_unpark(sched->thread); >> +} >> +EXPORT_SYMBOL(drm_sched_start); >> + >> +/** >> + * drm_sched_resubmit_jobs - helper to relunch job from mirror ring >> list >> + * >> + * @sched: scheduler instance >> + * >> + */ >> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched) >> +{ >> + struct drm_sched_job *s_job, *tmp; >> + uint64_t guilty_context; >> + bool found_guilty = false; >> + >> + /*TODO DO we need spinlock here ? */ >> + list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, >> node) { >> + struct drm_sched_fence *s_fence = s_job->s_fence; >> + >> + if (!found_guilty && atomic_read(&s_job->karma) > >> sched->hang_limit) { >> + found_guilty = true; >> + guilty_context = s_job->s_fence->scheduled.context; >> + } >> + >> + if (found_guilty && s_job->s_fence->scheduled.context == >> guilty_context) >> + dma_fence_set_error(&s_fence->finished, -ECANCELED); >> + >> + s_job->s_fence->parent = sched->ops->run_job(s_job); >> + atomic_inc(&sched->hw_rq_count); >> + } >> } >> -EXPORT_SYMBOL(drm_sched_job_recovery); >> +EXPORT_SYMBOL(drm_sched_resubmit_jobs); >> /** >> * drm_sched_job_init - init a scheduler job >> @@ -634,26 +704,14 @@ static int drm_sched_main(void *param) >> DRM_ERROR("fence add callback failed (%d)\n", >> r); >> dma_fence_put(fence); >> - } else { >> - if (s_fence->finished.error < 0) >> - drm_sched_expel_job_unlocked(sched_job); >> + } else >> drm_sched_process_job(NULL, &s_fence->cb); >> - } >> wake_up(&sched->job_scheduled); >> } >> return 0; >> } >> -static void drm_sched_expel_job_unlocked(struct drm_sched_job *s_job) >> -{ >> - struct drm_gpu_scheduler *sched = s_job->sched; >> - >> - spin_lock(&sched->job_list_lock); >> - list_del_init(&s_job->node); >> - spin_unlock(&sched->job_list_lock); >> -} >> - >> /** >> * drm_sched_init - Init a gpu scheduler instance >> * >> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c >> b/drivers/gpu/drm/v3d/v3d_sched.c >> index 445b2ef..f76d9ed 100644 >> --- a/drivers/gpu/drm/v3d/v3d_sched.c >> +++ b/drivers/gpu/drm/v3d/v3d_sched.c >> @@ -178,18 +178,22 @@ v3d_job_timedout(struct drm_sched_job *sched_job) >> for (q = 0; q < V3D_MAX_QUEUES; q++) { >> struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; >> - kthread_park(sched->thread); >> - drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? >> + drm_sched_stop(sched, (sched_job->sched == sched ? >> sched_job : NULL)); >> + >> + if(sched_job) >> + drm_sched_increase_karma(sched_job); >> } >> /* get the GPU back into the init state */ >> v3d_reset(v3d); >> + for (q = 0; q < V3D_MAX_QUEUES; q++) >> + drm_sched_resubmit_jobs(sched_job->sched); >> + >> /* Unblock schedulers and restart their jobs. */ >> for (q = 0; q < V3D_MAX_QUEUES; q++) { >> - drm_sched_job_recovery(&v3d->queue[q].sched); >> - kthread_unpark(v3d->queue[q].sched.thread); >> + drm_sched_start(&v3d->queue[q].sched, true); >> } >> mutex_unlock(&v3d->reset_lock); >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >> index 47e1979..5ab2d97 100644 >> --- a/include/drm/gpu_scheduler.h >> +++ b/include/drm/gpu_scheduler.h >> @@ -175,6 +175,7 @@ struct drm_sched_fence *to_drm_sched_fence(struct >> dma_fence *f); >> * finished to remove the job from the >> * @drm_gpu_scheduler.ring_mirror_list. >> * @node: used to append this struct to the >> @drm_gpu_scheduler.ring_mirror_list. >> + * @finish_node: used in a list to wait on before resetting the >> scheduler >> * @id: a unique id assigned to each job scheduled on the scheduler. >> * @karma: increment on every hang caused by this job. If this >> exceeds the hang >> * limit of the scheduler then the job is marked guilty and >> will not >> @@ -193,6 +194,7 @@ struct drm_sched_job { >> struct dma_fence_cb finish_cb; >> struct work_struct finish_work; >> struct list_head node; >> + struct list_head finish_node; >> uint64_t id; >> atomic_t karma; >> enum drm_sched_priority s_priority; >> @@ -298,9 +300,11 @@ int drm_sched_job_init(struct drm_sched_job *job, >> void *owner); >> void drm_sched_job_cleanup(struct drm_sched_job *job); >> void drm_sched_wakeup(struct drm_gpu_scheduler *sched); >> -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, >> - struct drm_sched_job *job); >> -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); >> +void drm_sched_stop(struct drm_gpu_scheduler *sched, >> + struct drm_sched_job *job); >> +void drm_sched_start(struct drm_gpu_scheduler *sched, bool >> full_recovery); >> +void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); >> +void drm_sched_increase_karma(struct drm_sched_job *bad); >> bool drm_sched_dependency_optimized(struct dma_fence* fence, >> struct drm_sched_entity *entity); >> void drm_sched_fault(struct drm_gpu_scheduler *sched); > > _______________________________________________ > amd-gfx mailing list > amd-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/amd-gfx _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel