On Thu, 28 Nov 2024 21:06:18 +0000 Adrián Larumbe <adrian.larumbe@xxxxxxxxxxxxx> wrote: > Avoid waiting for the DRM scheduler job timedout handler, and instead, let > the DRM scheduler core signal the error fence immediately when HW job > submission fails. > > That means we must also decrement the runtime-PM refcnt for the device, > because the job will never be enqueued or inflight. > > Signed-off-by: Adrián Larumbe <adrian.larumbe@xxxxxxxxxxxxx> > --- > drivers/gpu/drm/panfrost/panfrost_job.c | 20 ++++++++++++++++---- > 1 file changed, 16 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > index f640d211cc3a..3f4f0682d69d 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -195,7 +195,7 @@ panfrost_enqueue_job(struct panfrost_device *pfdev, int slot, > return 1; > } > > -static void panfrost_job_hw_submit(struct panfrost_job *job, int js) > +static int panfrost_job_hw_submit(struct panfrost_job *job, int js) > { > struct panfrost_device *pfdev = job->pfdev; > unsigned int subslot; > @@ -207,10 +207,11 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) > > ret = pm_runtime_get_sync(pfdev->base.dev); > if (ret < 0) > - return; > + goto err_hwsubmit; > > if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) { > - return; > + ret = -EINVAL; > + goto err_hwsubmit; > } > > cfg = panfrost_mmu_as_get(pfdev, job->mmu); > @@ -261,6 +262,12 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) > job, js, subslot, jc_head, cfg & 0xf); > } > spin_unlock(&pfdev->js->job_lock); > + > + return 0; > + > +err_hwsubmit: > + pm_runtime_put_autosuspend(pfdev->base.dev); > + return ret; > } > > static int panfrost_acquire_object_fences(struct drm_gem_object **bos, > @@ -382,6 +389,7 @@ static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) > struct panfrost_device *pfdev = job->pfdev; > int slot = panfrost_job_get_slot(job); > struct dma_fence *fence = NULL; > + int ret; > > if (unlikely(job->base.s_fence->finished.error)) > return NULL; > @@ -400,7 +408,11 @@ static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job) > dma_fence_put(job->done_fence); > job->done_fence = dma_fence_get(fence); > > - panfrost_job_hw_submit(job, slot); > + ret = panfrost_job_hw_submit(job, slot); > + if (ret) { > + dma_fence_put(job->done_fence); If you call dma_fence_put() here, you need to set job->done_fence to NULL, otherwise dma_fence_put() will be called again on an already freed fence in panfrost_job_cleanup(). Question is, do we really need to call dma_fence_put(job->done_fence) here? Can't we let the job destructor take care of that? > + return ERR_PTR(ret); > + } > > return fence; > }