On Mon, Oct 01, 2018 at 06:01:41PM +0530, Sharat Masetty wrote: > This patch hooks up the DRM gpu scheduler to the msm DRM driver. The > most noticeable changes to the driver are as follows. The submit path is > split into two parts, in the user context the submit(job) is created and > added to one of the entity's scheduler run queue. The scheduler then > tries to drain the queue by submitting the jobs the hardware to act upon. > The submit job sits on the scheduler queue until all the dependent > fences are waited upon successfully. > > We have one scheduler instance per ring. The submitqueues will host a > scheduler entity object. This entity will be mapped to the scheduler's > default runqueue. This should be good for now, but in future it is possible > to extend the API to allow for scheduling amongst the submitqueues on the > same ring. > > With this patch the role of the struct_mutex lock has been greatly reduced in > scope in the submit path, evidently when actually putting the job on the > ringbuffer. This should enable us with increased parallelism in the > driver which should translate to better performance overall hopefully. > > Signed-off-by: Sharat Masetty <smasetty@xxxxxxxxxxxxxx> > --- > drivers/gpu/drm/msm/Kconfig | 1 + > drivers/gpu/drm/msm/Makefile | 3 +- > drivers/gpu/drm/msm/msm_drv.h | 3 +- > drivers/gpu/drm/msm/msm_gem.c | 8 +- > drivers/gpu/drm/msm/msm_gem.h | 6 + > drivers/gpu/drm/msm/msm_gem_submit.c | 138 +++++++++------ > drivers/gpu/drm/msm/msm_gpu.c | 72 ++++++-- > drivers/gpu/drm/msm/msm_gpu.h | 2 + > drivers/gpu/drm/msm/msm_ringbuffer.c | 7 + > drivers/gpu/drm/msm/msm_ringbuffer.h | 3 + > drivers/gpu/drm/msm/msm_sched.c | 313 ++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/msm/msm_sched.h | 18 ++ > drivers/gpu/drm/msm/msm_submitqueue.c | 18 +- > 13 files changed, 507 insertions(+), 85 deletions(-) > create mode 100644 drivers/gpu/drm/msm/msm_sched.c > create mode 100644 drivers/gpu/drm/msm/msm_sched.h > > diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig > index 38cbde9..0d01810 100644 > --- a/drivers/gpu/drm/msm/Kconfig > +++ b/drivers/gpu/drm/msm/Kconfig > @@ -15,6 +15,7 @@ config DRM_MSM > select SND_SOC_HDMI_CODEC if SND_SOC > select SYNC_FILE > select PM_OPP > + select DRM_SCHED > default y > help > DRM/KMS driver for MSM/snapdragon. > diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile > index cd40c05..71ed921 100644 > --- a/drivers/gpu/drm/msm/Makefile > +++ b/drivers/gpu/drm/msm/Makefile > @@ -60,7 +60,8 @@ msm-y := \ > msm_perf.o \ > msm_rd.o \ > msm_ringbuffer.o \ > - msm_submitqueue.o > + msm_submitqueue.o \ > + msm_sched.o > > msm-$(CONFIG_DEBUG_FS) += adreno/a5xx_debugfs.o > > diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h > index b2da1fb..e461a9c 100644 > --- a/drivers/gpu/drm/msm/msm_drv.h > +++ b/drivers/gpu/drm/msm/msm_drv.h > @@ -213,8 +213,7 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, > int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); > int msm_gem_sync_object(struct drm_gem_object *obj, > struct msm_fence_context *fctx, bool exclusive); > -void msm_gem_move_to_active(struct drm_gem_object *obj, > - struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence); > +void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu); > void msm_gem_move_to_inactive(struct drm_gem_object *obj); > int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); > int msm_gem_cpu_fini(struct drm_gem_object *obj); > diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c > index f583bb4..7a12f30 100644 > --- a/drivers/gpu/drm/msm/msm_gem.c > +++ b/drivers/gpu/drm/msm/msm_gem.c > @@ -663,16 +663,12 @@ int msm_gem_sync_object(struct drm_gem_object *obj, > return 0; > } > > -void msm_gem_move_to_active(struct drm_gem_object *obj, > - struct msm_gpu *gpu, bool exclusive, struct dma_fence *fence) > +void msm_gem_move_to_active(struct drm_gem_object *obj, struct msm_gpu *gpu) > { > struct msm_gem_object *msm_obj = to_msm_bo(obj); > WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED); > msm_obj->gpu = gpu; > - if (exclusive) > - reservation_object_add_excl_fence(msm_obj->resv, fence); > - else > - reservation_object_add_shared_fence(msm_obj->resv, fence); > + > list_del_init(&msm_obj->mm_list); > list_add_tail(&msm_obj->mm_list, &gpu->active_list); > } > diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h > index cae3aa5..8c6269f 100644 > --- a/drivers/gpu/drm/msm/msm_gem.h > +++ b/drivers/gpu/drm/msm/msm_gem.h > @@ -20,6 +20,7 @@ > > #include <linux/kref.h> > #include <linux/reservation.h> > +#include <drm/gpu_scheduler.h> > #include "msm_drv.h" > > /* Additional internal-use only BO flags: */ > @@ -136,6 +137,7 @@ enum msm_gem_lock { > * lasts for the duration of the submit-ioctl. > */ > struct msm_gem_submit { > + struct drm_sched_job sched_job; > struct drm_device *dev; > struct msm_file_private *ctx; > struct msm_gpu *gpu; > @@ -144,6 +146,7 @@ struct msm_gem_submit { > struct ww_acquire_ctx ticket; > uint32_t seqno; /* Sequence number of the submit on the ring */ > struct dma_fence *hw_fence; > + struct dma_fence *in_fence, *out_fence; > int out_fence_id; > struct msm_gpu_submitqueue *queue; > struct pid *pid; /* submitting process */ > @@ -162,6 +165,9 @@ struct msm_gem_submit { > uint32_t flags; > struct msm_gem_object *obj; > uint64_t iova; > + struct dma_fence *excl; > + uint32_t nr_shared; > + struct dma_fence **shared; > } bos[0]; > }; > > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c > index 7931c2a..dff945c 100644 > --- a/drivers/gpu/drm/msm/msm_gem_submit.c > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c > @@ -65,23 +65,37 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) > { > int i; > > + mutex_lock(&submit->ring->fence_idr_lock); > idr_remove(&submit->ring->fence_idr, submit->out_fence_id); > - > - dma_fence_put(submit->hw_fence); > + mutex_unlock(&submit->ring->fence_idr_lock); Why are we using a mutex for this guy - this seems like a job for a spin if I ever saw one. Maybe even a rw spin depending on how often that idr gets queried. > for (i = 0; i < submit->nr_bos; i++) { > + int j; > + > struct msm_gem_object *msm_obj = submit->bos[i].obj; > > if (submit->bos[i].flags & BO_PINNED) > msm_gem_put_iova(&msm_obj->base, submit->gpu->aspace); > > - drm_gem_object_put(&msm_obj->base); > + drm_gem_object_put_unlocked(&msm_obj->base); > + > + /* > + * While we are at it, clear the saved exclusive and shared > + * fences if any > + */ > + dma_fence_put(submit->bos[i].excl); > + > + for (j = 0; j < submit->bos[i].nr_shared; j++) > + dma_fence_put(submit->bos[i].shared[j]); > + > + kfree(submit->bos[i].shared); > } > > - list_del(&submit->node); > put_pid(submit->pid); > msm_submitqueue_put(submit->queue); > > + dma_fence_put(submit->in_fence); > + dma_fence_put(submit->out_fence); > kfree(submit); > } > > @@ -238,6 +252,27 @@ static int submit_lock_objects(struct msm_gem_submit *submit) > return ret; > } > > +static void submit_attach_fences_and_unlock(struct msm_gem_submit *submit) > +{ > + int i; > + > + for (i = 0; i < submit->nr_bos; i++) { > + struct msm_gem_object *msm_obj = submit->bos[i].obj; > + > + if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) > + reservation_object_add_excl_fence(msm_obj->resv, > + submit->out_fence); > + else > + reservation_object_add_shared_fence(msm_obj->resv, > + submit->out_fence); > + > + if (submit->bos[i].flags & BO_LOCKED) { > + ww_mutex_unlock(&msm_obj->resv->lock); > + submit->bos[i].flags &= ~BO_LOCKED; > + } > + } > +} > + > static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) > { > int i, ret = 0; > @@ -260,10 +295,24 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) > if (no_implicit) > continue; > > - ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, > - write); > - if (ret) > - break; > + if (write) { > + /* > + * Save the per buffer shared and exclusive fences for > + * the scheduler thread to wait on. > + * > + * Note: The memory allocated for the storing the > + * shared fences will be released when the scheduler > + * is done waiting on all the saved fences. > + */ > + ret = reservation_object_get_fences_rcu(msm_obj->resv, > + &submit->bos[i].excl, > + &submit->bos[i].nr_shared, > + &submit->bos[i].shared); > + if (ret) > + return ret; I think this can just be a return ret; > + } else No need for the else > + submit->bos[i].excl = > + reservation_object_get_excl_rcu(msm_obj->resv); > } > > return ret; This can be a return 0; > @@ -425,7 +474,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, > struct msm_file_private *ctx = file->driver_priv; > struct msm_gem_submit *submit; > struct msm_gpu *gpu = priv->gpu; > - struct dma_fence *in_fence = NULL; > struct sync_file *sync_file = NULL; > struct msm_gpu_submitqueue *queue; > struct msm_ringbuffer *ring; > @@ -457,32 +505,11 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, > > ring = gpu->rb[queue->prio]; > > - if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { > - in_fence = sync_file_get_fence(args->fence_fd); > - > - if (!in_fence) > - return -EINVAL; > - > - /* > - * Wait if the fence is from a foreign context, or if the fence > - * array contains any fence from a foreign context. > - */ > - if (!dma_fence_match_context(in_fence, ring->fctx->context)) { > - ret = dma_fence_wait(in_fence, true); > - if (ret) > - return ret; > - } > - } > - > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > - > if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { > out_fence_fd = get_unused_fd_flags(O_CLOEXEC); > if (out_fence_fd < 0) { > ret = out_fence_fd; > - goto out_unlock; > + goto out_err; > } > } > > @@ -490,7 +517,16 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, > args->nr_cmds, ctx); > if (!submit) { > ret = -ENOMEM; > - goto out_unlock; > + goto out_err; > + } > + > + if (args->flags & MSM_SUBMIT_FENCE_FD_IN) { > + submit->in_fence = sync_file_get_fence(args->fence_fd); > + > + if (!submit->in_fence) { > + ret = -EINVAL; > + goto out; > + } > } > > if (args->flags & MSM_SUBMIT_SUDO) > @@ -573,28 +609,16 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, > > submit->nr_cmds = i; > > - msm_gpu_submit(submit); > - if (IS_ERR(submit->hw_fence)) { > - ret = PTR_ERR(submit->hw_fence); > - submit->hw_fence = NULL; > + ret = msm_sched_job_init(&submit->sched_job); > + if (ret) > goto out; > - } > > - /* > - * No protection should be okay here since this is protected by the big > - * GPU lock. > - */ > - submit->out_fence_id = idr_alloc_cyclic(&ring->fence_idr, > - submit->hw_fence, 0, INT_MAX, GFP_KERNEL); > - if (submit->out_fence_id < 0) { > - ret = -ENOMEM; > - goto out; > - } > + submit_attach_fences_and_unlock(submit); > > args->fence = submit->out_fence_id; > > if (args->flags & MSM_SUBMIT_FENCE_FD_OUT) { > - sync_file = sync_file_create(submit->hw_fence); > + sync_file = sync_file_create(submit->out_fence); > if (!sync_file) { > ret = -ENOMEM; > goto out; > @@ -604,14 +628,22 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, > } > > out: > - if (in_fence) > - dma_fence_put(in_fence); > + /* > + * Clean up the submit bits and pieces which are not needed beyond this > + * function context > + */ > submit_cleanup(submit); > - if (ret) > + > + if (!ret) > + /* > + * If we reached here, then all is well. Push the job to the > + * scheduler > + */ > + msm_sched_push_job(submit); > + else > msm_gem_submit_free(submit); > -out_unlock: > +out_err: > if (ret && (out_fence_fd >= 0)) > put_unused_fd(out_fence_fd); > - mutex_unlock(&dev->struct_mutex); > return ret; > } > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c > index cd5fe49..481a55c 100644 > --- a/drivers/gpu/drm/msm/msm_gpu.c > +++ b/drivers/gpu/drm/msm/msm_gpu.c > @@ -295,12 +295,17 @@ static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, > find_submit(struct msm_ringbuffer *ring, uint32_t fence) > { > struct msm_gem_submit *submit; > + unsigned long flags; > > - WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); > + spin_lock_irqsave(&ring->lock, flags); > > list_for_each_entry(submit, &ring->submits, node) > - if (submit->seqno == fence) > + if (submit->seqno == fence) { > + spin_unlock_irqrestore(&ring->lock, flags); > return submit; > + } > + > + spin_unlock_irqrestore(&ring->lock, flags); > > return NULL; > } > @@ -316,6 +321,12 @@ static void recover_worker(struct work_struct *work) > struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); > int i; > > + submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1); > + return msm_sched_gpu_recovery(gpu, submit); > + > + /* > + * The unused code below will be removed in a subsequent patch > + */ Why not now? > mutex_lock(&dev->struct_mutex); > > dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); > @@ -591,11 +602,36 @@ static void retire_worker(struct work_struct *work) > mutex_unlock(&dev->struct_mutex); > } > > -/* call from irq handler to schedule work to retire bo's */ > +static void signal_hw_fences(struct msm_ringbuffer *ring) > +{ > + unsigned long flags; > + struct msm_gem_submit *submit, *tmp; > + > + spin_lock_irqsave(&ring->lock, flags); > + > + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { > + if (submit->seqno > ring->memptrs->fence) > + break; > + > + list_del(&submit->node); > + > + dma_fence_signal(submit->hw_fence); > + } > + > + spin_unlock_irqrestore(&ring->lock, flags); > +} > + > +/* > + * Called from the IRQ context to signal hardware fences for the completed > + * submits > + */ > void msm_gpu_retire(struct msm_gpu *gpu) > { > - struct msm_drm_private *priv = gpu->dev->dev_private; > - queue_work(priv->wq, &gpu->retire_work); > + int i; > + > + for (i = 0; i < gpu->nr_rings; i++) > + signal_hw_fences(gpu->rb[i]); > + > update_sw_cntrs(gpu); > } > > @@ -606,25 +642,28 @@ struct dma_fence *msm_gpu_submit(struct msm_gem_submit *submit) > struct drm_device *dev = gpu->dev; > struct msm_drm_private *priv = dev->dev_private; > struct msm_ringbuffer *ring = submit->ring; > + unsigned long flags; > int i; > > - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); > - > submit->hw_fence = msm_fence_alloc(ring->fctx); > if (IS_ERR(submit->hw_fence)) > return submit->hw_fence; > > - pm_runtime_get_sync(&gpu->pdev->dev); > - > - msm_gpu_hw_init(gpu); > + spin_lock_irqsave(&ring->lock, flags); > + list_add_tail(&submit->node, &ring->submits); > + spin_unlock_irqrestore(&ring->lock, flags); > submit->seqno = ++ring->seqno; > > - list_add_tail(&submit->node, &ring->submits); > + update_sw_cntrs(gpu); I'm not sure if this needs the hardware to be up (it does check msm_gpu_active), but I don't think we should reorganize the order of these functions unless we need to. > - msm_rd_dump_submit(priv->rd, submit, NULL); > + mutex_lock(&dev->struct_mutex); > > - update_sw_cntrs(gpu); > + pm_runtime_get_sync(&gpu->pdev->dev); > + > + msm_gpu_hw_init(gpu); > + > + msm_rd_dump_submit(priv->rd, submit, NULL); > > for (i = 0; i < submit->nr_bos; i++) { > struct msm_gem_object *msm_obj = submit->bos[i].obj; > @@ -634,16 +673,13 @@ struct dma_fence *msm_gpu_submit(struct msm_gem_submit *submit) > */ > WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); > > - if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) > - msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->hw_fence); > - else if (submit->bos[i].flags & MSM_SUBMIT_BO_READ) > - msm_gem_move_to_active(&msm_obj->base, gpu, false, submit->hw_fence); > + msm_gem_move_to_active(&msm_obj->base, gpu); > } > > gpu->funcs->submit(gpu, submit, submit->ctx); > priv->lastctx = submit->ctx; > > - hangcheck_timer_reset(gpu); > + mutex_unlock(&dev->struct_mutex); > > return submit->hw_fence; > } > diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h > index dd55979..3bd1920 100644 > --- a/drivers/gpu/drm/msm/msm_gpu.h > +++ b/drivers/gpu/drm/msm/msm_gpu.h > @@ -173,6 +173,8 @@ struct msm_gpu_submitqueue { > int faults; > struct list_head node; > struct kref ref; > + struct msm_gpu *gpu; > + struct drm_sched_entity sched_entity; > }; > > static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data) > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c > index 0889766..ef2bf10 100644 > --- a/drivers/gpu/drm/msm/msm_ringbuffer.c > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c > @@ -56,9 +56,14 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, > > snprintf(ring->name, sizeof(ring->name), "msm-gpu-ring-%d", ring->id); > > + if (msm_sched_init(&ring->sched, ring->name) != 0) { > + ret = -EINVAL; > + goto fail; > + } > ring->fctx = msm_fence_context_alloc(gpu->dev, ring->name); > > idr_init(&ring->fence_idr); > + mutex_init(&ring->fence_idr_lock); > > return ring; > > @@ -74,6 +79,7 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) > > msm_fence_context_free(ring->fctx); > > + msm_sched_cleanup(&ring->sched); > if (ring->bo) { > msm_gem_put_iova(ring->bo, ring->gpu->aspace); > msm_gem_put_vaddr(ring->bo); > @@ -81,6 +87,7 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) > } > > idr_destroy(&ring->fence_idr); > + mutex_destroy(&ring->fence_idr_lock); > > kfree(ring); > } > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h > index 523373b..10ae4a8 100644 > --- a/drivers/gpu/drm/msm/msm_ringbuffer.h > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h > @@ -19,6 +19,7 @@ > #define __MSM_RINGBUFFER_H__ > > #include "msm_drv.h" > +#include "msm_sched.h" > > #define rbmemptr(ring, member) \ > ((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) > @@ -42,7 +43,9 @@ struct msm_ringbuffer { > uint64_t memptrs_iova; > struct msm_fence_context *fctx; > struct idr fence_idr; > + struct mutex fence_idr_lock; > spinlock_t lock; > + struct drm_gpu_scheduler sched; > }; > > struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, > diff --git a/drivers/gpu/drm/msm/msm_sched.c b/drivers/gpu/drm/msm/msm_sched.c > new file mode 100644 > index 0000000..8b805ce > --- /dev/null > +++ b/drivers/gpu/drm/msm/msm_sched.c > @@ -0,0 +1,313 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ // SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */ > + > +#include "msm_gpu.h" > +#include "msm_gem.h" > +#include "msm_sched.h" > + > +#include <linux/string_helpers.h> > +#include <linux/kthread.h> > + > +struct msm_gem_submit *to_msm_gem_submit(struct drm_sched_job *sched_job) > +{ > + return container_of(sched_job, struct msm_gem_submit, sched_job); > +} > + > +/* > + * Go through the bo's one by one and return the previously saved shared and bo's -> bos > + * exclusive fences. If the scheduler gets the fence, then it takes care of > + * releasing the reference on the fence. > + */ > +static struct dma_fence *msm_sched_dependency(struct drm_sched_job *sched_job, > + struct drm_sched_entity *entity) > +{ > + struct msm_gem_submit *submit = to_msm_gem_submit(sched_job); > + struct dma_fence *fence; > + int i; > + > + if (submit->in_fence) { > + fence = submit->in_fence; > + submit->in_fence = NULL; > + if (!dma_fence_is_signaled(fence)) > + return fence; > + > + dma_fence_put(fence); > + } There might be a chance to consolidate some code here static struct dma_fence *_get_fence(struct dma_fence **) { struct dma_fence *fence = *in; *in = NULL; if (fence && !dma_fence_is_signaled(fence)) return fence; dma_fence_put(fence); return NULL; } fence = _fence(&submit->in_fence); if (fence) return fence; > + /* Return the previously saved shared and exclusive fences if any */ > + for (i = 0; i < submit->nr_bos; i++) { > + int j; > + > + if (submit->bos[i].excl) { > + fence = submit->bos[i].excl; > + submit->bos[i].excl = NULL; > + > + if (!dma_fence_is_signaled(fence)) > + return fence; > + > + dma_fence_put(fence); > + } fence = _get_fence(&submit->bos[i].excl); if (fence) return fence; > + for (j = 0; j < submit->bos[i].nr_shared; j++) { > + if (!submit->bos[i].shared[j]) > + continue; > + > + fence = submit->bos[i].shared[j]; > + submit->bos[i].shared[j] = NULL; > + > + if (!dma_fence_is_signaled(fence)) > + return fence; > + > + dma_fence_put(fence); > + } fence = _get_fence(&submit->bos[i].shared); if (fence) return fence; > + > + kfree(submit->bos[i].shared); > + submit->bos[i].nr_shared = 0; > + submit->bos[i].shared = NULL; > + } > + > + return NULL; > +} This is an interesting function - in order to avoid wasting cycles it needs to be ordered so that the most likely fences happen first. If that is the case, I think that in_fence might be the least likely so you should check it last. > +static struct dma_fence *msm_sched_run_job(struct drm_sched_job *sched_job) > +{ > + struct msm_gem_submit *submit = to_msm_gem_submit(sched_job); > + > + return !sched_job->s_fence->finished.error ? > + msm_gpu_submit(submit) : NULL; > +} > + > +static void dump_bad_submit(struct msm_gem_submit *submit) > +{ > + struct msm_gpu *gpu = submit->gpu; > + struct drm_device *dev = gpu->dev; > + struct msm_drm_private *priv = dev->dev_private; > + struct task_struct *task; > + char task_name[32] = {0}; > + > + rcu_read_lock(); > + task = pid_task(submit->pid, PIDTYPE_PID); > + if (task) { > + char *cmd; > + > + cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL); This should be GFP_ATOMIC because we are in the rcu_read_lock(). There might be something else wrong with it too - I know we have this problem in the current kernel and I'm not sure if it is avoidable without losing the task name. > + dev_err(&gpu->pdev->dev, "%s: offending task: %s (%s)\n", > + gpu->name, task->comm, cmd); > + > + snprintf(task_name, sizeof(task_name), > + "offending task: %s (%s)", task->comm, cmd); > + > + kfree(cmd); > + } > + rcu_read_unlock(); > + > + mutex_lock(&gpu->dev->struct_mutex); > + msm_rd_dump_submit(priv->hangrd, submit, task_name); > + mutex_unlock(&gpu->dev->struct_mutex); > +} > + > +void msm_sched_gpu_recovery(struct msm_gpu *gpu, struct msm_gem_submit *submit) > +{ > + int i; > + static atomic_t gpu_recovery; > + > + /* Check if a GPU recovery is already in progress */ > + if (!(atomic_cmpxchg(&gpu_recovery, 0, 1) == 0)) > + return; > + > + /* > + * Pause the schedulers so we don't get new requests while the recovery > + * is in progress > + */ > + for (i = 0; i < gpu->nr_rings; i++) > + kthread_park(gpu->rb[i]->sched.thread); > + > + /* > + * Disable interrupts so we don't get interrupted while the recovery is > + * in progress > + */ > + disable_irq(gpu->irq); > + > + dev_err(&gpu->pdev->dev, "%s: hangcheck recover!\n", gpu->name); I don't know if we still need this line? Recovery seems to be standard operating procedure these days. > + > + if (submit) > + dump_bad_submit(submit); > + > + /* > + * For each ring, we do the following > + * a) Inform the scheduler to drop the hardware fence for the > + * submissions on its mirror list > + * b) The submit(job) list on the ring is not useful anymore > + * as we are going for a gpu reset, so we empty the submit list > + */ > + for (i = 0; i < gpu->nr_rings; i++) { > + struct msm_gem_submit *job, *tmp; > + unsigned long flags; > + struct msm_ringbuffer *ring = gpu->rb[i]; > + > + /* a) Release the hardware fences */ > + drm_sched_hw_job_reset(&ring->sched, > + (submit && submit->ring == ring) ? > + &submit->sched_job : NULL); > + > + /* b) Free up the ring submit list */ > + spin_lock_irqsave(&ring->lock, flags); > + > + list_for_each_entry_safe(job, tmp, &ring->submits, node) > + list_del(&job->node); > + > + spin_unlock_irqrestore(&ring->lock, flags); > + } > + > + /* Power cycle and reset the GPU back to init state */ > + mutex_lock(&gpu->dev->struct_mutex); > + > + pm_runtime_get_sync(&gpu->pdev->dev); > + gpu->funcs->recover(gpu); > + pm_runtime_put_sync(&gpu->pdev->dev); > + > + mutex_unlock(&gpu->dev->struct_mutex); > + > + /* Re-enable the interrupts once the gpu reset is complete */ > + enable_irq(gpu->irq); > + > + /* > + * The GPU is hopefully back in good shape, now request the schedulers > + * to replay its mirror list, starting with the scheduler on the highest > + * priority ring > + */ > + for (i = 0; i < gpu->nr_rings; i++) { > + drm_sched_job_recovery(&gpu->rb[i]->sched); > + kthread_unpark(gpu->rb[i]->sched.thread); > + } > + > + atomic_set(&gpu_recovery, 0); I think we need a smp_wmb() here to make sure everybody else sees the update. > +} > + > +static void msm_sched_timedout_job(struct drm_sched_job *bad_job) > +{ > + struct msm_gem_submit *submit = to_msm_gem_submit(bad_job); > + struct msm_gpu *gpu = submit->gpu; > + struct msm_ringbuffer *ring = submit->ring; > + > + /* > + * If this submission completed in the mean time, then the timeout is > + * spurious > + */ > + if (submit->seqno <= submit->ring->memptrs->fence) > + return; > + > + dev_err(&gpu->pdev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", > + gpu->name, ring->id); > + dev_err(&gpu->pdev->dev, "%s: completed fence: %u\n", > + gpu->name, ring->memptrs->fence); > + dev_err(&gpu->pdev->dev, "%s: submitted fence: %u\n", > + gpu->name, ring->seqno); > + > + msm_sched_gpu_recovery(gpu, submit); > +} > + > +static void msm_sched_free_job(struct drm_sched_job *sched_job) > +{ > + struct msm_gem_submit *submit = to_msm_gem_submit(sched_job); > + struct msm_gpu *gpu = submit->gpu; > + int i; > + > + mutex_lock(&gpu->dev->struct_mutex); > + > + for (i = 0; i < submit->nr_bos; i++) { > + struct msm_gem_object *msm_obj = submit->bos[i].obj; > + > + msm_gem_move_to_inactive(&msm_obj->base); > + } > + > + mutex_unlock(&gpu->dev->struct_mutex); > + > + pm_runtime_mark_last_busy(&gpu->pdev->dev); > + pm_runtime_put_autosuspend(&gpu->pdev->dev); > + > + msm_gem_submit_free(submit); > +} > + > +static const struct drm_sched_backend_ops msm_sched_ops = { > + .dependency = msm_sched_dependency, > + .run_job = msm_sched_run_job, > + .timedout_job = msm_sched_timedout_job, > + .free_job = msm_sched_free_job, > +}; > + > +int msm_sched_job_init(struct drm_sched_job *sched_job) > +{ > + int ret; > + struct msm_gem_submit *submit = to_msm_gem_submit(sched_job); > + struct msm_ringbuffer *ring = submit->ring; > + > + ret = drm_sched_job_init(sched_job, &ring->sched, > + &submit->queue->sched_entity, submit->ctx); > + if (ret) > + return ret; > + > + submit->out_fence = dma_fence_get(&submit->sched_job.s_fence->finished); > + > + mutex_lock(&ring->fence_idr_lock); > + submit->out_fence_id = idr_alloc_cyclic(&ring->fence_idr, > + submit->out_fence, 0, INT_MAX, > + GFP_KERNEL); > + mutex_unlock(&ring->fence_idr_lock); > + > + if (submit->out_fence_id < 0) { > + /* > + * TODO: The scheduler's finished fence leaks here since the > + * job will not be pushed to the queue. Need to update scheduler > + * to fix this cleanly(?) > + */ How would you propose to fix this? > + dma_fence_put(submit->out_fence); > + submit->out_fence = NULL; > + return -ENOMEM; > + } > + > + return 0; > +} > + > +void msm_sched_push_job(struct msm_gem_submit *submit) > +{ > + return drm_sched_entity_push_job(&submit->sched_job, > + &submit->queue->sched_entity); > +} > + > +int msm_sched_init(struct drm_gpu_scheduler *sched, const char *name) > +{ > + return drm_sched_init(sched, &msm_sched_ops, 4, 0, > + msecs_to_jiffies(500), name); Okay, I see where the ring->name comes in. I don't like it but at least it is a relatively low cost option if you share when the fence names. Still... sigh. > +} > + > +void msm_sched_cleanup(struct drm_gpu_scheduler *sched) > +{ > + drm_sched_fini(sched); > +} I don't think this function is needed - I think we're smart enough to call drm_sched_fini directly. > +/* > + * Create a new entity on the schedulers normal priority runqueue. For now we > + * choose to always use the normal run queue priority, but in future its > + * possible with some extension to the msm drm interface, to create the > + * submitqueue(entities) of different priorities on the same ring, thereby > + * allowing to priortize and schedule submitqueues belonging to the same ring > + */ > +int msm_sched_entity_init(struct msm_gpu *gpu, > + struct drm_sched_entity *sched_entity, int prio) > +{ > + struct drm_gpu_scheduler *sched = &gpu->rb[prio]->sched; > + > + return drm_sched_entity_init(sched, sched_entity, > + &sched->sched_rq[DRM_SCHED_PRIORITY_NORMAL], NULL); > +} > + > +void msm_sched_entity_cleanup(struct msm_gpu *gpu, > + struct drm_sched_entity *sched_entity, int prio) > +{ > + struct drm_gpu_scheduler *sched = &gpu->rb[prio]->sched; > + > + drm_sched_entity_fini(sched, sched_entity); > +} > diff --git a/drivers/gpu/drm/msm/msm_sched.h b/drivers/gpu/drm/msm/msm_sched.h > new file mode 100644 > index 0000000..6ab2728 > --- /dev/null > +++ b/drivers/gpu/drm/msm/msm_sched.h > @@ -0,0 +1,18 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ // SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2018, The Linux Foundation. All rights reserved. */ > + > +#ifndef __MSM_SCHED_H__ > +#define __MSM_SCHED_H__ > + > +#include <drm/gpu_scheduler.h> > + > +int msm_sched_init(struct drm_gpu_scheduler *sched, const char *name); > +void msm_sched_cleanup(struct drm_gpu_scheduler *sched); > +int msm_sched_entity_init(struct msm_gpu *gpu, struct drm_sched_entity *queue, > + int prio); > +void msm_sched_entity_cleanup(struct msm_gpu *gpu, > + struct drm_sched_entity *queue, int prio); > +int msm_sched_job_init(struct drm_sched_job *sched_job); > +void msm_sched_push_job(struct msm_gem_submit *submit); > +void msm_sched_gpu_recovery(struct msm_gpu *gpu, struct msm_gem_submit *submit); > +#endif /* __MSM_SCHED_H__ */ > diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c > index 325da44..b6eb13e 100644 > --- a/drivers/gpu/drm/msm/msm_submitqueue.c > +++ b/drivers/gpu/drm/msm/msm_submitqueue.c > @@ -19,6 +19,7 @@ void msm_submitqueue_destroy(struct kref *kref) > struct msm_gpu_submitqueue *queue = container_of(kref, > struct msm_gpu_submitqueue, ref); > > + msm_sched_entity_cleanup(queue->gpu, &queue->sched_entity, queue->prio); > kfree(queue); > } > > @@ -65,6 +66,7 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, > { > struct msm_drm_private *priv = drm->dev_private; > struct msm_gpu_submitqueue *queue; > + struct msm_gpu *gpu = priv->gpu; > > if (!ctx) > return -ENODEV; > @@ -77,13 +79,16 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, > kref_init(&queue->ref); > queue->flags = flags; > > - if (priv->gpu) { > - if (prio >= priv->gpu->nr_rings) { > - kfree(queue); > - return -EINVAL; > - } > + if (gpu) { > + if (prio >= gpu->nr_rings) > + goto fail; > + > + if (msm_sched_entity_init(priv->gpu, &queue->sched_entity, > + prio)) > + goto fail; > > queue->prio = prio; > + queue->gpu = gpu; > } > > write_lock(&ctx->queuelock); > @@ -98,6 +103,9 @@ int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx, > write_unlock(&ctx->queuelock); > > return 0; > +fail: > + kfree(queue); > + return -EINVAL; > } > > int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx) > -- > 1.9.1 > -- The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project