[AMD Official Use Only - General]
Sure. We can pull it into amd-staging-drm-next as well if we need it for any customers in the short term.
Alex
From: Christian König <ckoenig.leichtzumerken@xxxxxxxxx>
Sent: Friday, April 21, 2023 9:27 AM To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx <amd-gfx@xxxxxxxxxxxxxxxxxxxxx>; Deucher, Alexander <Alexander.Deucher@xxxxxxx> Cc: Tuikov, Luben <Luben.Tuikov@xxxxxxx> Subject: Re: [PATCH 1/8] drm/scheduler: properly forward fence errors Alex can I merge that through drm-misc-next or do we really need
amd-staging-drm-next? Christian. Am 21.04.23 um 07:22 schrieb Luben Tuikov: > Hi Christian, > > Thanks for working on this. > > Series is, > Reviewed-by: Luben Tuikov <luben.tuikov@xxxxxxx> > > Regards, > Luben > > On 2023-04-20 07:57, Christian König wrote: >> When a hw fence is signaled with an error properly forward that to the >> finished fence. >> >> Signed-off-by: Christian König <christian.koenig@xxxxxxx> >> --- >> drivers/gpu/drm/scheduler/sched_entity.c | 4 +--- >> drivers/gpu/drm/scheduler/sched_fence.c | 4 +++- >> drivers/gpu/drm/scheduler/sched_main.c | 18 ++++++++---------- >> include/drm/gpu_scheduler.h | 2 +- >> 4 files changed, 13 insertions(+), 15 deletions(-) >> >> diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c >> index 15d04a0ec623..eaf71fe15ed3 100644 >> --- a/drivers/gpu/drm/scheduler/sched_entity.c >> +++ b/drivers/gpu/drm/scheduler/sched_entity.c >> @@ -144,7 +144,7 @@ static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) >> { >> struct drm_sched_job *job = container_of(wrk, typeof(*job), work); >> >> - drm_sched_fence_finished(job->s_fence); >> + drm_sched_fence_finished(job->s_fence, -ESRCH); >> WARN_ON(job->s_fence->parent); >> job->sched->ops->free_job(job); >> } >> @@ -195,8 +195,6 @@ static void drm_sched_entity_kill(struct drm_sched_entity *entity) >> while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) { >> struct drm_sched_fence *s_fence = job->s_fence; >> >> - dma_fence_set_error(&s_fence->finished, -ESRCH); >> - >> dma_fence_get(&s_fence->finished); >> if (!prev || dma_fence_add_callback(prev, &job->finish_cb, >> drm_sched_entity_kill_jobs_cb)) >> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c >> index 7fd869520ef2..1a6bea98c5cc 100644 >> --- a/drivers/gpu/drm/scheduler/sched_fence.c >> +++ b/drivers/gpu/drm/scheduler/sched_fence.c >> @@ -53,8 +53,10 @@ void drm_sched_fence_scheduled(struct drm_sched_fence *fence) >> dma_fence_signal(&fence->scheduled); >> } >> >> -void drm_sched_fence_finished(struct drm_sched_fence *fence) >> +void drm_sched_fence_finished(struct drm_sched_fence *fence, int result) >> { >> + if (result) >> + dma_fence_set_error(&fence->finished, result); >> dma_fence_signal(&fence->finished); >> } >> >> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c >> index fcd4bfef7415..649fac2e1ccb 100644 >> --- a/drivers/gpu/drm/scheduler/sched_main.c >> +++ b/drivers/gpu/drm/scheduler/sched_main.c >> @@ -257,7 +257,7 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq) >> * >> * Finish the job's fence and wake up the worker thread. >> */ >> -static void drm_sched_job_done(struct drm_sched_job *s_job) >> +static void drm_sched_job_done(struct drm_sched_job *s_job, int result) >> { >> struct drm_sched_fence *s_fence = s_job->s_fence; >> struct drm_gpu_scheduler *sched = s_fence->sched; >> @@ -268,7 +268,7 @@ static void drm_sched_job_done(struct drm_sched_job *s_job) >> trace_drm_sched_process_job(s_fence); >> >> dma_fence_get(&s_fence->finished); >> - drm_sched_fence_finished(s_fence); >> + drm_sched_fence_finished(s_fence, result); >> dma_fence_put(&s_fence->finished); >> wake_up_interruptible(&sched->wake_up_worker); >> } >> @@ -282,7 +282,7 @@ static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb) >> { >> struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb); >> >> - drm_sched_job_done(s_job); >> + drm_sched_job_done(s_job, f->error); >> } >> >> /** >> @@ -533,12 +533,12 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) >> r = dma_fence_add_callback(fence, &s_job->cb, >> drm_sched_job_done_cb); >> if (r == -ENOENT) >> - drm_sched_job_done(s_job); >> + drm_sched_job_done(s_job, fence->error); >> else if (r) >> DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", >> r); >> } else >> - drm_sched_job_done(s_job); >> + drm_sched_job_done(s_job, 0); >> } >> >> if (full_recovery) { >> @@ -1010,15 +1010,13 @@ static int drm_sched_main(void *param) >> r = dma_fence_add_callback(fence, &sched_job->cb, >> drm_sched_job_done_cb); >> if (r == -ENOENT) >> - drm_sched_job_done(sched_job); >> + drm_sched_job_done(sched_job, fence->error); >> else if (r) >> DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", >> r); >> } else { >> - if (IS_ERR(fence)) >> - dma_fence_set_error(&s_fence->finished, PTR_ERR(fence)); >> - >> - drm_sched_job_done(sched_job); >> + drm_sched_job_done(sched_job, IS_ERR(fence) ? >> + PTR_ERR(fence) : 0); >> } >> >> wake_up(&sched->job_scheduled); >> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h >> index ca857ec9e7eb..5c1df6b12ced 100644 >> --- a/include/drm/gpu_scheduler.h >> +++ b/include/drm/gpu_scheduler.h >> @@ -569,7 +569,7 @@ void drm_sched_fence_init(struct drm_sched_fence *fence, >> void drm_sched_fence_free(struct drm_sched_fence *fence); >> >> void drm_sched_fence_scheduled(struct drm_sched_fence *fence); >> -void drm_sched_fence_finished(struct drm_sched_fence *fence); >> +void drm_sched_fence_finished(struct drm_sched_fence *fence, int result); >> >> unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched); >> void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched, |