Follow the semantics of DMA_RESV_USAGE_PREEMPT in the DRM scheduler by storing preemptive fences in a dedicated xarray, which is waited on after all other fences are signaled. In addition to following these semantics, pipeline preemptive fences by enabling signaling on all preemptive fences before waiting on any of them. Cc: Philipp Stanner <pstanner@xxxxxxxxxx> Cc: Danilo Krummrich <dakr@xxxxxxxxxx> Cc: Luben Tuikov <ltuikov89@xxxxxxxxx> Cc: Dave Airlie <airlied@xxxxxxxxxx> Cc: Simona Vetter <simona.vetter@xxxxxxxx> Cc: Christian Koenig <christian.koenig@xxxxxxx> Signed-off-by: Matthew Brost <matthew.brost@xxxxxxxxx> --- drivers/gpu/drm/scheduler/sched_entity.c | 29 ++++++++++++-- drivers/gpu/drm/scheduler/sched_main.c | 48 ++++++++++++++++-------- include/drm/gpu_scheduler.h | 15 ++++++++ 3 files changed, 73 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 69bcf0e99d57..c6c4978aa65a 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -201,11 +201,13 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, struct drm_sched_job *job = container_of(cb, struct drm_sched_job, finish_cb); unsigned long index; + struct xarray *dependencies = &job->dependencies; dma_fence_put(f); +again: /* Wait for all dependencies to avoid data corruptions */ - xa_for_each(&job->dependencies, index, f) { + xa_for_each(dependencies, index, f) { struct drm_sched_fence *s_fence = to_drm_sched_fence(f); if (s_fence && f == &s_fence->scheduled) { @@ -223,7 +225,7 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, dma_fence_put(&s_fence->scheduled); } - xa_erase(&job->dependencies, index); + xa_erase(dependencies, index); if (f && !dma_fence_add_callback(f, &job->finish_cb, drm_sched_entity_kill_jobs_cb)) return; @@ -231,6 +233,11 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, dma_fence_put(f); } + if (dependencies != &job->preempt_dependencies) { + dependencies = &job->preempt_dependencies; + goto again; + } + INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); schedule_work(&job->work); } @@ -456,17 +463,33 @@ drm_sched_job_dependency(struct drm_sched_job *job, struct drm_sched_entity *entity) { struct dma_fence *f; + struct xarray *dependencies; + +again: + dependencies = job->resolve_preempt_dependencies ? + &job->preempt_dependencies : &job->dependencies; /* We keep the fence around, so we can iterate over all dependencies * in drm_sched_entity_kill_jobs_cb() to ensure all deps are signaled * before killing the job. */ - f = xa_load(&job->dependencies, job->last_dependency); + f = xa_load(dependencies, job->last_dependency); if (f) { job->last_dependency++; return dma_fence_get(f); } + /* Switch resolving preempt dependencies pipelining signaling */ + if (!job->resolve_preempt_dependencies) { + unsigned long index; + + xa_for_each(&job->preempt_dependencies, index, f) + dma_fence_enable_sw_signaling(f); + + job->resolve_preempt_dependencies = true; + goto again; + } + if (job->sched->ops->prepare_job) return job->sched->ops->prepare_job(job, entity); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 7ce25281c74c..eceb9b8c6f5f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -829,6 +829,7 @@ int drm_sched_job_init(struct drm_sched_job *job, INIT_LIST_HEAD(&job->list); xa_init_flags(&job->dependencies, XA_FLAGS_ALLOC); + xa_init_flags(&job->preempt_dependencies, XA_FLAGS_ALLOC); return 0; } @@ -864,21 +865,14 @@ void drm_sched_job_arm(struct drm_sched_job *job) } EXPORT_SYMBOL(drm_sched_job_arm); -/** - * drm_sched_job_add_dependency - adds the fence as a job dependency - * @job: scheduler job to add the dependencies to - * @fence: the dma_fence to add to the list of dependencies. - * - * Note that @fence is consumed in both the success and error cases. - * - * Returns: - * 0 on success, or an error on failing to expand the array. - */ -int drm_sched_job_add_dependency(struct drm_sched_job *job, - struct dma_fence *fence) +static int __drm_sched_job_add_dependency(struct drm_sched_job *job, + struct dma_fence *fence, + bool is_preempt) { struct dma_fence *entry; unsigned long index; + struct xarray *dependencies = is_preempt ? &job->preempt_dependencies : + &job->dependencies; u32 id = 0; int ret; @@ -889,25 +883,41 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job, * This lets the size of the array of deps scale with the number of * engines involved, rather than the number of BOs. */ - xa_for_each(&job->dependencies, index, entry) { + xa_for_each(dependencies, index, entry) { if (entry->context != fence->context) continue; if (dma_fence_is_later(fence, entry)) { dma_fence_put(entry); - xa_store(&job->dependencies, index, fence, GFP_KERNEL); + xa_store(dependencies, index, fence, GFP_KERNEL); } else { dma_fence_put(fence); } return 0; } - ret = xa_alloc(&job->dependencies, &id, fence, xa_limit_32b, GFP_KERNEL); + ret = xa_alloc(dependencies, &id, fence, xa_limit_32b, GFP_KERNEL); if (ret != 0) dma_fence_put(fence); return ret; } + +/** + * drm_sched_job_add_dependency - adds the fence as a job dependency + * @job: scheduler job to add the dependencies to + * @fence: the dma_fence to add to the list of dependencies. + * + * Note that @fence is consumed in both the success and error cases. + * + * Returns: + * 0 on success, or an error on failing to expand the array. + */ +int drm_sched_job_add_dependency(struct drm_sched_job *job, + struct dma_fence *fence) +{ + return __drm_sched_job_add_dependency(job, fence, false); +} EXPORT_SYMBOL(drm_sched_job_add_dependency); /** @@ -963,7 +973,9 @@ int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job, dma_resv_for_each_fence(&cursor, resv, usage, fence) { /* Make sure to grab an additional ref on the added fence */ dma_fence_get(fence); - ret = drm_sched_job_add_dependency(job, fence); + ret = __drm_sched_job_add_dependency(job, fence, + cursor.fence_usage == + DMA_RESV_USAGE_PREEMPT); if (ret) { dma_fence_put(fence); return ret; @@ -1030,6 +1042,10 @@ void drm_sched_job_cleanup(struct drm_sched_job *job) } xa_destroy(&job->dependencies); + xa_for_each(&job->preempt_dependencies, index, fence) { + dma_fence_put(fence); + } + xa_destroy(&job->preempt_dependencies); } EXPORT_SYMBOL(drm_sched_job_cleanup); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 95e17504e46a..de16cf6b1869 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -353,6 +353,13 @@ struct drm_sched_job { u32 credits; + /** + * @resolve_preempt_dependencies: + * + * Job is currently resolving preempt dependencies. + */ + bool resolve_preempt_dependencies; + /* * work is used only after finish_cb has been used and will not be * accessed anymore. @@ -376,6 +383,14 @@ struct drm_sched_job { */ struct xarray dependencies; + /** + * @preempt_dependencies: + * + * Contains the dependencies as struct dma_fence for this job which are + * preempt fences. + */ + struct xarray preempt_dependencies; + /** @last_dependency: tracks @dependencies as they signal */ unsigned long last_dependency; -- 2.34.1