On 28/06/2024 15:55, Boris Brezillon wrote: > A sync-only job is meant to provide a synchronization point on a > queue, so we can't return a NULL fence there, we have to add a signal > operation to the command stream which executes after all other > previously submitted jobs are done. > > Fixes: de8548813824 ("drm/panthor: Add the scheduler logical block") > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> Looks correct to me. Reviewed-by: Steven Price <steven.price@xxxxxxx> > --- > drivers/gpu/drm/panthor/panthor_sched.c | 41 ++++++++++++++++++++----- > include/uapi/drm/panthor_drm.h | 5 +++ > 2 files changed, 38 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c > index 79ffcbc41d78..951ff7e63ea8 100644 > --- a/drivers/gpu/drm/panthor/panthor_sched.c > +++ b/drivers/gpu/drm/panthor/panthor_sched.c > @@ -458,6 +458,16 @@ struct panthor_queue { > /** @seqno: Sequence number of the last initialized fence. */ > atomic64_t seqno; > > + /** > + * @last_fence: Fence of the last submitted job. > + * > + * We return this fence when we get an empty command stream. > + * This way, we are guaranteed that all earlier jobs have completed > + * when drm_sched_job::s_fence::finished without having to feed > + * the CS ring buffer with a dummy job that only signals the fence. > + */ > + struct dma_fence *last_fence; > + > /** > * @in_flight_jobs: List containing all in-flight jobs. > * > @@ -829,6 +839,9 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * > panthor_kernel_bo_destroy(queue->ringbuf); > panthor_kernel_bo_destroy(queue->iface.mem); > > + /* Release the last_fence we were holding, if any. */ > + dma_fence_put(queue->fence_ctx.last_fence); > + > kfree(queue); > } > > @@ -2865,11 +2878,14 @@ queue_run_job(struct drm_sched_job *sched_job) > static_assert(sizeof(call_instrs) % 64 == 0, > "call_instrs is not aligned on a cacheline"); > > - /* Stream size is zero, nothing to do => return a NULL fence and let > - * drm_sched signal the parent. > + /* Stream size is zero, nothing to do except making sure all previously > + * submitted jobs are done before we signal the > + * drm_sched_job::s_fence::finished fence. > */ > - if (!job->call_info.size) > - return NULL; > + if (!job->call_info.size) { > + job->done_fence = dma_fence_get(queue->fence_ctx.last_fence); > + return job->done_fence; > + } > > ret = pm_runtime_resume_and_get(ptdev->base.dev); > if (drm_WARN_ON(&ptdev->base, ret)) > @@ -2928,6 +2944,10 @@ queue_run_job(struct drm_sched_job *sched_job) > } > } > > + /* Update the last fence. */ > + dma_fence_put(queue->fence_ctx.last_fence); > + queue->fence_ctx.last_fence = dma_fence_get(job->done_fence); > + > done_fence = dma_fence_get(job->done_fence); > > out_unlock: > @@ -3378,10 +3398,15 @@ panthor_job_create(struct panthor_file *pfile, > goto err_put_job; > } > > - job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > - if (!job->done_fence) { > - ret = -ENOMEM; > - goto err_put_job; > + /* Empty command streams don't need a fence, they'll pick the one from > + * the previously submitted job. > + */ > + if (job->call_info.size) { > + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); > + if (!job->done_fence) { > + ret = -ENOMEM; > + goto err_put_job; > + } > } > > ret = drm_sched_job_init(&job->base, > diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h > index aaed8e12ad0b..926b1deb1116 100644 > --- a/include/uapi/drm/panthor_drm.h > +++ b/include/uapi/drm/panthor_drm.h > @@ -802,6 +802,9 @@ struct drm_panthor_queue_submit { > * Must be 64-bit/8-byte aligned (the size of a CS instruction) > * > * Can be zero if stream_addr is zero too. > + * > + * When the stream size is zero, the queue submit serves as a > + * synchronization point. > */ > __u32 stream_size; > > @@ -822,6 +825,8 @@ struct drm_panthor_queue_submit { > * ensure the GPU doesn't get garbage when reading the indirect command > * stream buffers. If you want the cache flush to happen > * unconditionally, pass a zero here. > + * > + * Ignored when stream_size is zero. > */ > __u32 latest_flush; >