-----Original Message-----
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of
Andrey Grodzovsky
Sent: Tuesday, December 11, 2018 5:44 AM
To: dri-devel@xxxxxxxxxxxxxxxxxxxxx; amd-gfx@xxxxxxxxxxxxxxxxxxxxx;
ckoenig.leichtzumerken@xxxxxxxxx; eric@xxxxxxxxxx;
etnaviv@xxxxxxxxxxxxxxxxxxxxx
Cc: Zhou, David(ChunMing) <David1.Zhou@xxxxxxx>; Liu, Monk
<Monk.Liu@xxxxxxx>; Grodzovsky, Andrey
<Andrey.Grodzovsky@xxxxxxx>
Subject: [PATCH v3 2/2] drm/sched: Rework HW fence processing.
Expedite job deletion from ring mirror list to the HW fence signal callback
instead from finish_work, together with waiting for all such fences to signal in
drm_sched_stop we garantee that already signaled job will not be processed
twice.
Remove the sched finish fence callback and just submit finish_work directly
from the HW fence callback.
v2: Fix comments.
v3: Attach hw fence cb to sched_job
Suggested-by: Christian Koenig <Christian.Koenig@xxxxxxx>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx>
---
drivers/gpu/drm/scheduler/sched_main.c | 58 ++++++++++++++++----------
--------
include/drm/gpu_scheduler.h | 6 ++--
2 files changed, 30 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/scheduler/sched_main.c
b/drivers/gpu/drm/scheduler/sched_main.c
index cdf95e2..f0c1f32 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -284,8 +284,6 @@ static void drm_sched_job_finish(struct work_struct
*work)
cancel_delayed_work_sync(&sched->work_tdr);
spin_lock_irqsave(&sched->job_list_lock, flags);
- /* remove job from ring_mirror_list */
- list_del_init(&s_job->node);
/* queue TDR for next job */
drm_sched_start_timeout(sched);
spin_unlock_irqrestore(&sched->job_list_lock, flags); @@ -293,22
+291,11 @@ static void drm_sched_job_finish(struct work_struct *work)
sched->ops->free_job(s_job);
}
-static void drm_sched_job_finish_cb(struct dma_fence *f,
- struct dma_fence_cb *cb)
-{
- struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
- finish_cb);
- schedule_work(&job->finish_work);
-}
-
static void drm_sched_job_begin(struct drm_sched_job *s_job) {
struct drm_gpu_scheduler *sched = s_job->sched;
unsigned long flags;
- dma_fence_add_callback(&s_job->s_fence->finished, &s_job-
finish_cb,
- drm_sched_job_finish_cb);
-
spin_lock_irqsave(&sched->job_list_lock, flags);
list_add_tail(&s_job->node, &sched->ring_mirror_list);
drm_sched_start_timeout(sched);
@@ -359,12 +346,11 @@ void drm_sched_stop(struct drm_gpu_scheduler
*sched, struct drm_sched_job *bad,
list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node)
{
if (s_job->s_fence->parent &&
dma_fence_remove_callback(s_job->s_fence->parent,
- &s_job->s_fence->cb)) {
+ &s_job->cb)) {
dma_fence_put(s_job->s_fence->parent);
s_job->s_fence->parent = NULL;
atomic_dec(&sched->hw_rq_count);
- }
- else {
+ } else {
/* TODO Is it get/put neccessey here ? */
dma_fence_get(&s_job->s_fence->finished);
list_add(&s_job->finish_node, &wait_list); @@ -
417,31 +403,34 @@ EXPORT_SYMBOL(drm_sched_stop); void
drm_sched_start(struct drm_gpu_scheduler *sched, bool unpark_only) {
struct drm_sched_job *s_job, *tmp;
- unsigned long flags;
int r;
if (unpark_only)
goto unpark;
- spin_lock_irqsave(&sched->job_list_lock, flags);
+ /*
+ * Locking the list is not required here as the sched thread is parked
+ * so no new jobs are being pushed in to HW and in drm_sched_stop
we
+ * flushed all the jobs who were still in mirror list but who already
+ * signaled and removed them self from the list. Also concurrent
+ * GPU recovers can't run in parallel.
+ */
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list,
node) {
- struct drm_sched_fence *s_fence = s_job->s_fence;
struct dma_fence *fence = s_job->s_fence->parent;
if (fence) {
- r = dma_fence_add_callback(fence, &s_fence->cb,
+ r = dma_fence_add_callback(fence, &s_job->cb,
drm_sched_process_job);
if (r == -ENOENT)
- drm_sched_process_job(fence, &s_fence-
cb);
+ drm_sched_process_job(fence, &s_job->cb);
else if (r)
DRM_ERROR("fence add callback failed
(%d)\n",
r);
} else
- drm_sched_process_job(NULL, &s_fence->cb);
+ drm_sched_process_job(NULL, &s_job->cb);
}
drm_sched_start_timeout(sched);
- spin_unlock_irqrestore(&sched->job_list_lock, flags);
unpark:
kthread_unpark(sched->thread);
@@ -590,18 +579,27 @@ drm_sched_select_entity(struct
drm_gpu_scheduler *sched)
*/
static void drm_sched_process_job(struct dma_fence *f, struct
dma_fence_cb *cb) {
- struct drm_sched_fence *s_fence =
- container_of(cb, struct drm_sched_fence, cb);
+ struct drm_sched_job *s_job = container_of(cb, struct
drm_sched_job, cb);
+ struct drm_sched_fence *s_fence = s_job->s_fence;
struct drm_gpu_scheduler *sched = s_fence->sched;
+ unsigned long flags;
+
+ cancel_delayed_work(&sched->work_tdr);
- dma_fence_get(&s_fence->finished);
atomic_dec(&sched->hw_rq_count);
atomic_dec(&sched->num_jobs);
+
+ spin_lock_irqsave(&sched->job_list_lock, flags);
+ /* remove job from ring_mirror_list */
+ list_del_init(&s_job->node);
+ spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
drm_sched_fence_finished(s_fence);
trace_drm_sched_process_job(s_fence);
- dma_fence_put(&s_fence->finished);
wake_up_interruptible(&sched->wake_up_worker);
+
+ schedule_work(&s_job->finish_work);
}
/**
@@ -664,16 +662,16 @@ static int drm_sched_main(void *param)
if (fence) {
s_fence->parent = dma_fence_get(fence);
- r = dma_fence_add_callback(fence, &s_fence->cb,
+ r = dma_fence_add_callback(fence, &sched_job->cb,
drm_sched_process_job);
if (r == -ENOENT)
- drm_sched_process_job(fence, &s_fence-
cb);
+ drm_sched_process_job(fence, &sched_job-
cb);
else if (r)
DRM_ERROR("fence add callback failed
(%d)\n",
r);
dma_fence_put(fence);
} else
- drm_sched_process_job(NULL, &s_fence->cb);
+ drm_sched_process_job(NULL, &sched_job->cb);
wake_up(&sched->job_scheduled);
}
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index c94b592..f29aa1c 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -138,10 +138,6 @@ struct drm_sched_fence {
struct dma_fence finished;
/**
- * @cb: the callback for the parent fence below.
- */
- struct dma_fence_cb cb;
- /**
* @parent: the fence returned by &drm_sched_backend_ops.run_job
* when scheduling the job on hardware. We signal the
* &drm_sched_fence.finished fence once parent is signalled.
@@ -182,6 +178,7 @@ struct drm_sched_fence
*to_drm_sched_fence(struct dma_fence *f);
* be scheduled further.
* @s_priority: the priority of the job.
* @entity: the entity to which this job belongs.
+ * @cb: the callback for the parent fence in s_fence.
*
* A job is created by the driver using drm_sched_job_init(), and
* should call drm_sched_entity_push_job() once it wants the scheduler
@@ -199,6 +196,7 @@ struct drm_sched_job {
atomic_t karma;
enum drm_sched_priority s_priority;
struct drm_sched_entity *entity;
+ struct dma_fence_cb cb;
};
static inline bool drm_sched_invalidate_job(struct drm_sched_job *s_job,
--
2.7.4
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx