On 13/09/2019 12:17, Boris Brezillon wrote: > The READ/WRITE flags are particularly useful if we want to avoid > serialization of jobs that read from the same BO but never write to it. > The NO_IMPLICIT_FENCE might be useful when the user knows the BO is > shared but jobs are using different portions of the buffer. > > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> Good feature - we could do with an (easy) way of the user driver detecting this - so it might be worth bumping the driver version for this? Some more comments below. > --- > drivers/gpu/drm/panfrost/panfrost_drv.c | 72 +++++++++-- > drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++----- > drivers/gpu/drm/panfrost/panfrost_job.h | 11 +- > include/uapi/drm/panfrost_drm.h | 41 ++++++ > 4 files changed, 247 insertions(+), 41 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c > index d74442d71048..08082fd557c3 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev, > struct drm_panfrost_submit *args, > struct panfrost_job *job) > { > - job->bo_count = args->bo_handle_count; > + struct drm_panfrost_submit_bo *bo_descs = NULL; > + u32 *handles = NULL; > + u32 i, bo_count; > + int ret = 0; > > - if (!job->bo_count) > + bo_count = args->bo_desc_count ? > + args->bo_desc_count : args->bo_handle_count; > + if (!bo_count) > return 0; > > - job->implicit_fences = kvmalloc_array(job->bo_count, > - sizeof(struct dma_fence *), > + job->bos = kvmalloc_array(bo_count, sizeof(*job->bos), > GFP_KERNEL | __GFP_ZERO); > - if (!job->implicit_fences) > + if (!job->bos) > return -ENOMEM; > > - return drm_gem_objects_lookup(file_priv, > - (void __user *)(uintptr_t)args->bo_handles, > - job->bo_count, &job->bos); > + job->bo_count = bo_count; > + bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs), > + GFP_KERNEL | __GFP_ZERO); > + if (!bo_descs) { > + ret = -ENOMEM; > + goto out; This can be just "return -ENOMEM" - both handles and bo_descs will be NULL. > + } > + > + if (!args->bo_desc_count) { > + handles = kvmalloc_array(bo_count, sizeof(*handles), > + GFP_KERNEL); > + if (!handles) { > + ret =-ENOMEM; > + goto out; > + } > + > + if (copy_from_user(handles, > + (void __user *)(uintptr_t)args->bo_handles, > + job->bo_count * sizeof(*handles))) { > + ret = -EFAULT; > + goto out; > + } > + > + for (i = 0; i < job->bo_count; i++) { > + bo_descs[i].handle = handles[i]; > + bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE | > + PANFROST_SUBMIT_BO_READ; You can use PANFROST_SUBMIT_BO_RW here. > + } > + } else if (copy_from_user(bo_descs, > + (void __user *)(uintptr_t)args->bo_descs, > + job->bo_count * sizeof(*bo_descs))) { > + ret = -EFAULT; > + goto out; > + } > + > + for (i = 0; i < job->bo_count; i++) { > + if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) || > + !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) { > + ret = -EINVAL; > + goto out; > + } > + > + job->bos[i].flags = bo_descs[i].flags; > + job->bos[i].obj = drm_gem_object_lookup(file_priv, > + bo_descs[i].handle); > + if (!job->bos[i].obj) { > + ret = -ENOENT; > + goto out; > + } > + } > + > +out: > + kvfree(handles); > + kvfree(bo_descs); > + return ret; > } > > /** > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > index 05c85f45a0de..e4b74fde9339 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) > pm_runtime_put_autosuspend(pfdev->dev); > } > > -static void panfrost_acquire_object_fences(struct drm_gem_object **bos, > - int bo_count, > - struct dma_fence **implicit_fences) > +static int panfrost_acquire_object_fences(struct panfrost_job *job) > { > - int i; > + int i, ret; > > - for (i = 0; i < bo_count; i++) > - implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv); > + for (i = 0; i < job->bo_count; i++) { > + struct panfrost_job_bo_desc *bo = &job->bos[i]; > + struct dma_resv *robj = bo->obj->resv; > + > + if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) { > + ret = dma_resv_reserve_shared(robj, 1); > + if (ret) > + return ret; > + } > + > + if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE) > + continue; > + > + if (bo->flags & PANFROST_SUBMIT_BO_WRITE) { > + ret = dma_resv_get_fences_rcu(robj, &bo->excl, > + &bo->shared_count, > + &bo->shared); > + if (ret) > + return ret; > + } else { > + bo->excl = dma_resv_get_excl_rcu(robj); > + } The implementation of NO_IMPLICIT_FENCE seems a bit strange to me: READ | NO_IMPLICIT_FENCE still reserves space for a shared fence. I don't understand why. > + } > + > + return 0; > } > > -static void panfrost_attach_object_fences(struct drm_gem_object **bos, > - int bo_count, > - struct dma_fence *fence) > +static void panfrost_attach_object_fences(struct panfrost_job *job) > { > int i; > > - for (i = 0; i < bo_count; i++) > - dma_resv_add_excl_fence(bos[i]->resv, fence); > + for (i = 0; i < job->bo_count; i++) { > + struct drm_gem_object *obj = job->bos[i].obj; > + > + if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE) > + dma_resv_add_excl_fence(obj->resv, > + job->render_done_fence); > + else > + dma_resv_add_shared_fence(obj->resv, > + job->render_done_fence); > + } > +} > + > +static int panfrost_job_lock_bos(struct panfrost_job *job, > + struct ww_acquire_ctx *acquire_ctx) > +{ > + int contended = -1; > + int i, ret; > + > + ww_acquire_init(acquire_ctx, &reservation_ww_class); > + > +retry: > + if (contended != -1) { > + struct drm_gem_object *obj = job->bos[contended].obj; > + > + ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock, > + acquire_ctx); dma_resv_lock_slot_interruptible()? > + if (ret) { > + ww_acquire_done(acquire_ctx); > + return ret; > + } > + } > + > + for (i = 0; i < job->bo_count; i++) { > + if (i == contended) > + continue; > + > + ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock, > + acquire_ctx); dma_resv_lock_interruptible()? > + if (ret) { > + int j; > + > + for (j = 0; j < i; j++) > + ww_mutex_unlock(&job->bos[j].obj->resv->lock); > + > + if (contended != -1 && contended >= i) { > + struct drm_gem_object *contended_obj; > + > + contended_obj = job->bos[contended].obj; > + ww_mutex_unlock(&contended_obj->resv->lock); > + } > + > + if (ret == -EDEADLK) { > + contended = i; > + goto retry; > + } > + > + ww_acquire_done(acquire_ctx); > + return ret; > + } > + } > + > + ww_acquire_done(acquire_ctx); > + > + return 0; > +} This looks like a copy of drm_gem_lock_reservations(). The only reason for it as far as I can see is because we now have an array of struct panfrost_job_bo_desc rather than a direct array of struct drm_gem_object. I'm not sure having everything neatly in one structure is worth this cost? > + > +static void panfrost_job_unlock_bos(struct panfrost_job *job, > + struct ww_acquire_ctx *acquire_ctx) > +{ > + int i; > + > + for (i = 0; i < job->bo_count; i++) > + ww_mutex_unlock(&job->bos[i].obj->resv->lock); > + > + ww_acquire_fini(acquire_ctx); > }> > int panfrost_job_push(struct panfrost_job *job) > @@ -223,8 +315,7 @@ int panfrost_job_push(struct panfrost_job *job) > > mutex_lock(&pfdev->sched_lock); > > - ret = drm_gem_lock_reservations(job->bos, job->bo_count, > - &acquire_ctx); > + ret = panfrost_job_lock_bos(job, &acquire_ctx); > if (ret) { > mutex_unlock(&pfdev->sched_lock); > return ret; > @@ -240,18 +331,16 @@ int panfrost_job_push(struct panfrost_job *job) > > kref_get(&job->refcount); /* put by scheduler job completion */ > > - panfrost_acquire_object_fences(job->bos, job->bo_count, > - job->implicit_fences); > + panfrost_acquire_object_fences(job); > > drm_sched_entity_push_job(&job->base, entity); > > mutex_unlock(&pfdev->sched_lock); > > - panfrost_attach_object_fences(job->bos, job->bo_count, > - job->render_done_fence); > + panfrost_attach_object_fences(job); > > unlock: > - drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); > + panfrost_job_unlock_bos(job, &acquire_ctx); > > return ret; > } > @@ -267,20 +356,22 @@ static void panfrost_job_cleanup(struct kref *ref) > dma_fence_put(job->in_fences[i]); > kvfree(job->in_fences); > } > - if (job->implicit_fences) { > - for (i = 0; i < job->bo_count; i++) > - dma_fence_put(job->implicit_fences[i]); > - kvfree(job->implicit_fences); > + > + for (i = 0; i < job->bo_count; i++) { > + unsigned int j; > + > + dma_fence_put(job->bos[i].excl); > + for (j = 0; j < job->bos[i].shared_count; j++) > + dma_fence_put(job->bos[i].shared[j]); > } > + > dma_fence_put(job->done_fence); > dma_fence_put(job->render_done_fence); > > - if (job->bos) { > - for (i = 0; i < job->bo_count; i++) > - drm_gem_object_put_unlocked(job->bos[i]); > - kvfree(job->bos); > - } > + for (i = 0; i < job->bo_count; i++) > + drm_gem_object_put_unlocked(job->bos[i].obj); > > + kvfree(job->bos); > kfree(job); > } > > @@ -314,11 +405,22 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job > } > } > > - /* Implicit fences, max. one per BO */ > + /* Implicit fences */ > for (i = 0; i < job->bo_count; i++) { > - if (job->implicit_fences[i]) { > - fence = job->implicit_fences[i]; > - job->implicit_fences[i] = NULL; > + unsigned int j; > + > + if (job->bos[i].excl) { > + fence = job->bos[i].excl; > + job->bos[i].excl = NULL; > + return fence; > + } > + > + for (j = 0; j < job->bos[i].shared_count; j++) { > + if (!job->bos[i].shared[j]) > + continue; > + > + fence = job->bos[i].shared[j]; > + job->bos[i].shared[j] = NULL; > return fence; > } > } > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h > index 62454128a792..53440640a6e3 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.h > +++ b/drivers/gpu/drm/panfrost/panfrost_job.h > @@ -11,6 +11,14 @@ struct panfrost_device; > struct panfrost_gem_object; > struct panfrost_file_priv; > > +struct panfrost_job_bo_desc { > + struct drm_gem_object *obj; > + struct dma_fence *excl; > + struct dma_fence **shared; > + unsigned int shared_count; > + u32 flags; > +}; > + > struct panfrost_job { > struct drm_sched_job base; > > @@ -31,8 +39,7 @@ struct panfrost_job { > __u32 flush_id; > > /* Exclusive fences we have taken from the BOs to wait for */ Comment needs updating > - struct dma_fence **implicit_fences; > - struct drm_gem_object **bos; > + struct panfrost_job_bo_desc *bos; > u32 bo_count; > > /* Fence to be signaled by drm-sched once its done with the job */ > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index ec19db1eead8..029c6ae1b1f0 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -40,6 +40,28 @@ extern "C" { > #define DRM_IOCTL_PANFROST_PERFCNT_DUMP DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump) > > #define PANFROST_JD_REQ_FS (1 << 0) > + > +#define PANFROST_SUBMIT_BO_READ (1 << 0) > +#define PANFROST_SUBMIT_BO_WRITE (1 << 1) > +#define PANFROST_SUBMIT_BO_RW (PANFROST_SUBMIT_BO_READ | \ > + PANFROST_SUBMIT_BO_WRITE) > +#define PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE (1 << 2) > +#define PANFROST_SUBMIT_BO_VALID_FLAGS \ > + (PANFROST_SUBMIT_BO_RW | PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE) > + > +/** > + * struct drm_panfrost_submit_bo - BO descriptor passed to the submit ioctl. > + * > + * Useful to give detailed information about BOs used by a given job. > + * > + * @handle: the GEM handle > + * @flags: a combination of PANFROST_SUBMIT_BO_* > + */ > +struct drm_panfrost_submit_bo { > + __u32 handle; > + __u32 flags; > +}; > + > /** > * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D > * engine. > @@ -68,6 +90,25 @@ struct drm_panfrost_submit { > > /** A combination of PANFROST_JD_REQ_* */ > __u32 requirements; > + > + /** > + * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This > + * field is meant to replace &drm_panfrost_submit.bo_handles which did > + * not provide enough information to relax synchronization between > + * jobs that only only read the BO they share. When both > + * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs > + * are provided, drm_panfrost_submit.bo_handles is ignored. > + */ > + __u64 bo_descs; > + > + /** > + * Number of BO descriptors passed in (size is that times > + * sizeof(drm_panfrost_submit_bo_desc)). > + */ > + __u32 bo_desc_count; We don't really need another count field. bo_handle_count could be re-used. Indeed this could even be handled with just a flags field with a new flag specifying that bo_handles no longer points to handles but to bo_desc objects instead. Steve > + > + /** Padding, must be 0. */ > + __u32 pad; > }; > > /** > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel