For context, the native dependency support is used in these as yet unsubmitted files: https://gitlab.freedesktop.org/sarah-walker-imgtec/powervr/-/blob/dev/v3/drivers/gpu/drm/imagination/pvr_job.c https://gitlab.freedesktop.org/sarah-walker-imgtec/powervr/-/blob/dev/v3/drivers/gpu/drm/imagination/pvr_queue.c Thanks, Donald On Thu, 2023-06-08 at 14:23 +0100, Donald Robson wrote: > This patch adds support for 'native' dependencies to DRM scheduler. In > drivers that use a firmware based scheduler there are performance gains > to be had by allowing waits to happen in the firmware, as this reduces > the latency between signalling and job submission. Dependencies that > can be awaited by the firmware scheduler are termed 'native > dependencies'. In the new PowerVR driver we delegate the waits to the > firmware, but it is still necessary to expose these fences within DRM > scheduler. This is because when a job is cancelled > drm_sched_entity_kill() registers callback to all the dependencies in > order to ensure the job finished fence is not signalled before all the > job dependencies are met, and if they were not exposed the core wouldn't > be able to guarantee that anymore, and it might signal the fence too > early leading to potential invalid accesses if other things depend on > the job finished fence. > > All dependencies are handled in the same way up to the point that > dependencies for a job are being checked. At this stage, DRM scheduler > will now allow job submission to proceed once it encounters the first > native dependency in the list - dependencies having been sorted > beforehand in drm_sched_job_arm() so that native ones appear last. The > list is sorted during drm_sched_job_arm() because the scheduler isn't > known until this point, and determining whether a dependency is native > is via a new drm_gpu_scheduler backend operation. > > Native fences are just simple counters that get incremented every time > some specific execution point is reached, like when a GPU job is done. > The firmware is in charge of waiting but also updating fences, so it can > easily unblock any waiters it has internally. The CPU also has access to > these counters, so it can also check for progress. > > TODO: > > When operating normally the CPU is not supposed to update the counters > itself, but there is one specific situation where this is needed - when > a GPU hang happened and some context were declared faulty because they > had unfinished or blocked jobs pending. In that situation, when we reset > the GPU we will evict faulty contexts so they can't submit jobs anymore > and we will cancel the jobs that were in-flight at the time of reset, > but that's not enough because some jobs on other non-faulty contexts > might have native dependencies on jobs that never completed on this > faulty context. > > If we were to ask the firmware to wait on those native fences, it would > block indefinitely, because no one would ever update the counter. So, in > that case, and that case only, we want the CPU to force-update the > counter and set it to the last issued sequence number. We do not > currently have a helper for this and we welcome any suggestions for how > best to implement it. > > Signed-off-by: Donald Robson <donald.robson@xxxxxxxxxx> > Cc: Luben Tuikov <luben.tuikov@xxxxxxx> > Cc: David Airlie <airlied@xxxxxxxxx> > Cc: Daniel Vetter <daniel@xxxxxxxx> > Cc: Sumit Semwal <sumit.semwal@xxxxxxxxxx> > Cc: "Christian König" <christian.koenig@xxxxxxx> > Cc: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> > Cc: Frank Binns <frank.binns@xxxxxxxxxx> > Cc: Sarah Walker <sarah.walker@xxxxxxxxxx> > --- > drivers/gpu/drm/scheduler/sched_entity.c | 60 +++++++++++++-- > drivers/gpu/drm/scheduler/sched_main.c | 96 ++++++++++++++++++++++++ > include/drm/gpu_scheduler.h | 11 +++ > 3 files changed, 161 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c > index 15d04a0ec623..2685805a5e05 100644 > --- a/drivers/gpu/drm/scheduler/sched_entity.c > +++ b/drivers/gpu/drm/scheduler/sched_entity.c > @@ -155,13 +155,14 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, > { > struct drm_sched_job *job = container_of(cb, struct drm_sched_job, > finish_cb); > + unsigned long idx; > int r; > > dma_fence_put(f); > > /* Wait for all dependencies to avoid data corruptions */ > - while (!xa_empty(&job->dependencies)) { > - f = xa_erase(&job->dependencies, job->last_dependency++); > + xa_for_each(&job->dependencies, idx, f) { > + xa_erase(&job->dependencies, idx); > r = dma_fence_add_callback(f, &job->finish_cb, > drm_sched_entity_kill_jobs_cb); > if (!r) > @@ -390,12 +391,59 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity) > return false; > } > > +/** > + * dep_is_native - indicates that native dependencies are supported and that the > + * dependency at @index is marked. > + * @job: Scheduler job. > + * @index: Index into the @job->dependencies xarray. > + * > + * Must only be used after calling drm_sched_job_arm(). > + * > + * Returns true if both these conditions are met. > + */ > +static bool dep_is_native(struct drm_sched_job *job, unsigned long index) > +{ > + return job->sched->ops->dependency_is_native && > + xa_get_mark(&job->dependencies, job->last_dependency, XA_MARK_0); > +} > + > static struct dma_fence * > -drm_sched_job_dependency(struct drm_sched_job *job, > - struct drm_sched_entity *entity) > +drm_sched_job_dependency(struct drm_sched_job *job, struct drm_sched_entity *entity) > { > - if (!xa_empty(&job->dependencies)) > - return xa_erase(&job->dependencies, job->last_dependency++); > + struct dma_fence *fence; > + unsigned long dep_index; > + > + if (!dep_is_native(job, job->last_dependency)) { > + fence = xa_erase(&job->dependencies, job->last_dependency++); > + if (fence) > + return fence; > + } > + > + xa_for_each_start(&job->dependencies, dep_index, fence, > + job->last_dependency) { > + /* > + * Encountered first native dependency. Since these were > + * previously sorted to the end of the array in > + * drm_sched_sort_native_deps(), all remaining entries > + * will be native too, so we can just iterate through > + * them. > + * > + * Native entries cannot be erased, as they need to be > + * accessed by the driver's native scheduler. > + * > + * If the native fence is a drm_sched_fence object, we > + * ensure the job has been submitted so drm_sched_fence > + * ::parent points to a valid dma_fence object. > + */ > + struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); > + struct dma_fence *scheduled_fence = > + s_fence ? dma_fence_get_rcu(&s_fence->scheduled) : NULL; > + > + job->last_dependency = dep_index + 1; > + > + if (scheduled_fence) > + return scheduled_fence; > + } > > if (job->sched->ops->prepare_job) > return job->sched->ops->prepare_job(job, entity); > diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c > index 214364fccb71..08dcc33ec690 100644 > --- a/drivers/gpu/drm/scheduler/sched_main.c > +++ b/drivers/gpu/drm/scheduler/sched_main.c > @@ -643,6 +643,92 @@ int drm_sched_job_init(struct drm_sched_job *job, > } > EXPORT_SYMBOL(drm_sched_job_init); > > +/** > + * drm_sched_sort_native_deps - relocates all native dependencies to the > + * tail end of @job->dependencies. > + * @job: target scheduler job. > + * > + * Starts by marking all of the native dependencies, then, in a quick-sort > + * like manner it swaps entries using a head and tail index counter. Only > + * a single partition is required, as there are only two values. > + */ > +static void drm_sched_sort_native_deps(struct drm_sched_job *job) > +{ > + struct dma_fence *entry, *head = NULL, *tail = NULL; > + unsigned long h = 0, t = 0, native_dep_count = 0; > + XA_STATE(xas_head, &job->dependencies, 0); > + XA_STATE(xas_tail, &job->dependencies, 0); > + bool already_sorted = true; > + > + if (!job->sched->ops->dependency_is_native) > + /* Driver doesn't support native deps. */ > + return; > + > + /* Mark all the native dependencies as we walk xas_tail to the end. */ > + xa_lock(&job->dependencies); > + xas_for_each(&xas_tail, entry, ULONG_MAX) { > + /* Keep track of the index. */ > + t++; > + > + if (job->sched->ops->dependency_is_native(entry)) { > + xas_set_mark(&xas_tail, XA_MARK_0); > + native_dep_count++; > + } else if (native_dep_count) { > + /* > + * As a native dep has been encountered before, we can > + * infer the array is not already sorted. > + */ > + already_sorted = false; > + } > + } > + xa_unlock(&job->dependencies); > + > + if (already_sorted) > + return; > + > + /* xas_tail and t are now at the end of the array. */ > + xa_lock(&job->dependencies); > + while (h < t) { > + if (!head) { > + /* Find a marked entry. */ > + if (xas_get_mark(&xas_head, XA_MARK_0)) { > + head = xas_load(&xas_head); > + } else { > + xas_next(&xas_head); > + h++; > + } > + } > + if (!tail) { > + /* Find an unmarked entry. */ > + if (xas_get_mark(&xas_tail, XA_MARK_0)) { > + xas_prev(&xas_tail); > + t--; > + } else { > + tail = xas_load(&xas_tail); > + } > + } > + if (head && tail) { > + /* > + * Swap! > + * These stores should never allocate, since they both > + * already exist, hence they never fail. > + */ > + xas_store(&xas_head, tail); > + xas_store(&xas_tail, head); > + > + /* Also swap the mark. */ > + xas_clear_mark(&xas_head, XA_MARK_0); > + xas_set_mark(&xas_tail, XA_MARK_0); > + > + head = NULL; > + tail = NULL; > + h++; > + t--; > + } > + } > + xa_unlock(&job->dependencies); > +} > + > /** > * drm_sched_job_arm - arm a scheduler job for execution > * @job: scheduler job to arm > @@ -669,6 +755,7 @@ void drm_sched_job_arm(struct drm_sched_job *job) > job->s_priority = entity->rq - sched->sched_rq; > job->id = atomic64_inc_return(&sched->job_id_count); > > + drm_sched_sort_native_deps(job); > drm_sched_fence_init(job->s_fence, job->entity); > } > EXPORT_SYMBOL(drm_sched_job_arm); > @@ -1045,6 +1132,15 @@ static int drm_sched_main(void *param) > trace_drm_run_job(sched_job, entity); > fence = sched->ops->run_job(sched_job); > complete_all(&entity->entity_idle); > + > + /* We need to set the parent before signaling the scheduled > + * fence if we want native dependency to work properly. If we > + * don't, the driver might try to access the parent before > + * it's set. > + */ > + if (!IS_ERR_OR_NULL(fence)) > + drm_sched_fence_set_parent(s_fence, fence); > + > drm_sched_fence_scheduled(s_fence); > > if (!IS_ERR_OR_NULL(fence)) { > diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h > index 898608f87b96..dca6be35e517 100644 > --- a/include/drm/gpu_scheduler.h > +++ b/include/drm/gpu_scheduler.h > @@ -455,6 +455,17 @@ struct drm_sched_backend_ops { > * and it's time to clean it up. > */ > void (*free_job)(struct drm_sched_job *sched_job); > + > + /** > + * @dependency_is_native: When arming a job for this scheduler, this > + * function will be called to determine whether to treat it as a > + * native dependency. A native dependency is awaited and cleaned up > + * when the job is cancelled, but responsibility is otherwise delegated > + * to a native scheduler in the calling driver code. > + * > + * Optional - implies support for native dependencies. > + */ > + bool (*dependency_is_native)(struct dma_fence *fence); > }; > > /**