Am 20.10.2017 um 15:32 schrieb Andrey Grodzovsky: > Switch from kfifo to SPSC queue. > > Bug: > Kfifo is limited at size, during GPU reset it would fill up to limit > and the pushing thread (producer) would wait for the scheduler worker to > consume the items in the fifo while holding reservation lock > on a BO. The gpu reset thread on the other hand blocks the scheduler > during reset. Before it unblocks the sceduler it might want > to recover VRAM and so will try to reserve the same BO the producer > thread is already holding creating a deadlock. > > Fix: > Switch from kfifo to SPSC queue which is unlimited in size. > > Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky at amd.com> > --- > drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h | 4 +- > drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 51 ++++++++++--------------- > drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 4 +- > 3 files changed, 26 insertions(+), 33 deletions(-) > > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h > index 8bd3810..86838a8 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h > +++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h > @@ -28,8 +28,8 @@ TRACE_EVENT(amd_sched_job, > __entry->id = sched_job->id; > __entry->fence = &sched_job->s_fence->finished; > __entry->name = sched_job->sched->name; > - __entry->job_count = kfifo_len( > - &sched_job->s_entity->job_queue) / sizeof(sched_job); > + __entry->job_count = spsc_queue_count( > + &sched_job->s_entity->job_queue); > __entry->hw_job_count = atomic_read( > &sched_job->sched->hw_rq_count); > ), > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > index 1bbbce2..0c9cdc0 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > @@ -28,9 +28,14 @@ > #include <drm/drmP.h> > #include "gpu_scheduler.h" > > +#include "spsc_queue.h" > + > #define CREATE_TRACE_POINTS > #include "gpu_sched_trace.h" > > +#define to_amd_sched_job(sched_job) \ > + container_of((sched_job), struct amd_sched_job, queue_node) > + > static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity); > static void amd_sched_wakeup(struct amd_gpu_scheduler *sched); > static void amd_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb); > @@ -123,8 +128,6 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, > struct amd_sched_rq *rq, > uint32_t jobs) > { > - int r; > - > if (!(sched && entity && rq)) > return -EINVAL; > > @@ -135,9 +138,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched, > > spin_lock_init(&entity->rq_lock); > spin_lock_init(&entity->queue_lock); > - r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL); > - if (r) > - return r; > + spsc_queue_init(&entity->job_queue); > > atomic_set(&entity->fence_seq, 0); > entity->fence_context = dma_fence_context_alloc(2); > @@ -170,7 +171,7 @@ static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched, > static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) > { > rmb(); > - if (kfifo_is_empty(&entity->job_queue)) > + if (spsc_queue_peek(&entity->job_queue) == NULL) > return true; > > return false; > @@ -185,7 +186,7 @@ static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity) > */ > static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity) > { > - if (kfifo_is_empty(&entity->job_queue)) > + if (spsc_queue_peek(&entity->job_queue) == NULL) > return false; > > if (ACCESS_ONCE(entity->dependency)) > @@ -227,7 +228,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, > */ > kthread_park(sched->thread); > kthread_unpark(sched->thread); > - while (kfifo_out(&entity->job_queue, &job, sizeof(job))) { > + while ((job = to_amd_sched_job(spsc_queue_pop(&entity->job_queue)))) { > struct amd_sched_fence *s_fence = job->s_fence; > amd_sched_fence_scheduled(s_fence); > dma_fence_set_error(&s_fence->finished, -ESRCH); > @@ -235,9 +236,7 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, > dma_fence_put(&s_fence->finished); > sched->ops->free_job(job); > } > - > } > - kfifo_free(&entity->job_queue); > } > > static void amd_sched_entity_wakeup(struct dma_fence *f, struct dma_fence_cb *cb) > @@ -332,18 +331,21 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) > } > > static struct amd_sched_job * > -amd_sched_entity_peek_job(struct amd_sched_entity *entity) > +amd_sched_entity_pop_job(struct amd_sched_entity *entity) > { > struct amd_gpu_scheduler *sched = entity->sched; > - struct amd_sched_job *sched_job; > + struct amd_sched_job *sched_job = to_amd_sched_job( > + spsc_queue_peek(&entity->job_queue)); > > - if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job))) > + if (!sched_job) > return NULL; > > while ((entity->dependency = sched->ops->dependency(sched_job))) > if (amd_sched_entity_add_dependency_cb(entity)) > return NULL; > > + sched_job->s_entity = NULL; > + spsc_queue_pop(&entity->job_queue); > return sched_job; > } > > @@ -354,18 +356,14 @@ amd_sched_entity_peek_job(struct amd_sched_entity *entity) > * > * Returns true if we could submit the job. > */ > -static bool amd_sched_entity_in(struct amd_sched_job *sched_job) > +static void amd_sched_entity_in(struct amd_sched_job *sched_job) > { > struct amd_gpu_scheduler *sched = sched_job->sched; > struct amd_sched_entity *entity = sched_job->s_entity; > - bool added, first = false; > + bool first = false; > > spin_lock(&entity->queue_lock); > - added = kfifo_in(&entity->job_queue, &sched_job, > - sizeof(sched_job)) == sizeof(sched_job); > - > - if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job)) > - first = true; > + first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); > > spin_unlock(&entity->queue_lock); > > @@ -377,7 +375,6 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job) > spin_unlock(&entity->rq_lock); > amd_sched_wakeup(sched); > } > - return added; > } > > /* job_finish is called after hw fence signaled > @@ -514,11 +511,8 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched) > */ > void amd_sched_entity_push_job(struct amd_sched_job *sched_job) > { > - struct amd_sched_entity *entity = sched_job->s_entity; > - > trace_amd_sched_job(sched_job); > - wait_event(entity->sched->job_scheduled, > - amd_sched_entity_in(sched_job)); > + amd_sched_entity_in(sched_job); To save a few loc amd_sched_entity_in() can now be completely merged into this function. Apart from that the patch looks really good to me and is Reviewed-by: Christian König <christian.koenig at amd.com>. Ping me internally for new work if you now have time again. Thanks, Christian. > } > > /* init a sched_job with basic field */ > @@ -611,7 +605,7 @@ static int amd_sched_main(void *param) > { > struct sched_param sparam = {.sched_priority = 1}; > struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param; > - int r, count; > + int r; > > sched_setscheduler(current, SCHED_FIFO, &sparam); > > @@ -629,7 +623,7 @@ static int amd_sched_main(void *param) > if (!entity) > continue; > > - sched_job = amd_sched_entity_peek_job(entity); > + sched_job = amd_sched_entity_pop_job(entity); > if (!sched_job) > continue; > > @@ -656,9 +650,6 @@ static int amd_sched_main(void *param) > amd_sched_process_job(NULL, &s_fence->cb); > } > > - count = kfifo_out(&entity->job_queue, &sched_job, > - sizeof(sched_job)); > - WARN_ON(count != sizeof(sched_job)); > wake_up(&sched->job_scheduled); > } > return 0; > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > index 3f75b45..8844ce7 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > @@ -26,6 +26,7 @@ > > #include <linux/kfifo.h> > #include <linux/dma-fence.h> > +#include "spsc_queue.h" > > struct amd_gpu_scheduler; > struct amd_sched_rq; > @@ -56,7 +57,7 @@ struct amd_sched_entity { > struct amd_gpu_scheduler *sched; > > spinlock_t queue_lock; > - struct kfifo job_queue; > + struct spsc_queue job_queue; > > atomic_t fence_seq; > uint64_t fence_context; > @@ -87,6 +88,7 @@ struct amd_sched_fence { > }; > > struct amd_sched_job { > + struct spsc_node queue_node; > struct amd_gpu_scheduler *sched; > struct amd_sched_entity *s_entity; > struct amd_sched_fence *s_fence;