The READ/WRITE flags are particularly useful if we want to avoid serialization of jobs that read from the same BO but never write to it. The NO_IMPLICIT_FENCE might be useful when the user knows the BO is shared but jobs are using different portions of the buffer. Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> --- drivers/gpu/drm/panfrost/panfrost_drv.c | 72 +++++++++-- drivers/gpu/drm/panfrost/panfrost_job.c | 164 +++++++++++++++++++----- drivers/gpu/drm/panfrost/panfrost_job.h | 11 +- include/uapi/drm/panfrost_drm.h | 41 ++++++ 4 files changed, 247 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index d74442d71048..08082fd557c3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -119,20 +119,76 @@ panfrost_lookup_bos(struct drm_device *dev, struct drm_panfrost_submit *args, struct panfrost_job *job) { - job->bo_count = args->bo_handle_count; + struct drm_panfrost_submit_bo *bo_descs = NULL; + u32 *handles = NULL; + u32 i, bo_count; + int ret = 0; - if (!job->bo_count) + bo_count = args->bo_desc_count ? + args->bo_desc_count : args->bo_handle_count; + if (!bo_count) return 0; - job->implicit_fences = kvmalloc_array(job->bo_count, - sizeof(struct dma_fence *), + job->bos = kvmalloc_array(bo_count, sizeof(*job->bos), GFP_KERNEL | __GFP_ZERO); - if (!job->implicit_fences) + if (!job->bos) return -ENOMEM; - return drm_gem_objects_lookup(file_priv, - (void __user *)(uintptr_t)args->bo_handles, - job->bo_count, &job->bos); + job->bo_count = bo_count; + bo_descs = kvmalloc_array(bo_count, sizeof(*bo_descs), + GFP_KERNEL | __GFP_ZERO); + if (!bo_descs) { + ret = -ENOMEM; + goto out; + } + + if (!args->bo_desc_count) { + handles = kvmalloc_array(bo_count, sizeof(*handles), + GFP_KERNEL); + if (!handles) { + ret =-ENOMEM; + goto out; + } + + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->bo_handles, + job->bo_count * sizeof(*handles))) { + ret = -EFAULT; + goto out; + } + + for (i = 0; i < job->bo_count; i++) { + bo_descs[i].handle = handles[i]; + bo_descs[i].flags = PANFROST_SUBMIT_BO_WRITE | + PANFROST_SUBMIT_BO_READ; + } + } else if (copy_from_user(bo_descs, + (void __user *)(uintptr_t)args->bo_descs, + job->bo_count * sizeof(*bo_descs))) { + ret = -EFAULT; + goto out; + } + + for (i = 0; i < job->bo_count; i++) { + if ((bo_descs[i].flags & ~PANFROST_SUBMIT_BO_VALID_FLAGS) || + !(bo_descs[i].flags & PANFROST_SUBMIT_BO_RW)) { + ret = -EINVAL; + goto out; + } + + job->bos[i].flags = bo_descs[i].flags; + job->bos[i].obj = drm_gem_object_lookup(file_priv, + bo_descs[i].handle); + if (!job->bos[i].obj) { + ret = -ENOENT; + goto out; + } + } + +out: + kvfree(handles); + kvfree(bo_descs); + return ret; } /** diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 05c85f45a0de..e4b74fde9339 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -193,24 +193,116 @@ static void panfrost_job_hw_submit(struct panfrost_job *job, int js) pm_runtime_put_autosuspend(pfdev->dev); } -static void panfrost_acquire_object_fences(struct drm_gem_object **bos, - int bo_count, - struct dma_fence **implicit_fences) +static int panfrost_acquire_object_fences(struct panfrost_job *job) { - int i; + int i, ret; - for (i = 0; i < bo_count; i++) - implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv); + for (i = 0; i < job->bo_count; i++) { + struct panfrost_job_bo_desc *bo = &job->bos[i]; + struct dma_resv *robj = bo->obj->resv; + + if (!(job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE)) { + ret = dma_resv_reserve_shared(robj, 1); + if (ret) + return ret; + } + + if (bo->flags & PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE) + continue; + + if (bo->flags & PANFROST_SUBMIT_BO_WRITE) { + ret = dma_resv_get_fences_rcu(robj, &bo->excl, + &bo->shared_count, + &bo->shared); + if (ret) + return ret; + } else { + bo->excl = dma_resv_get_excl_rcu(robj); + } + } + + return 0; } -static void panfrost_attach_object_fences(struct drm_gem_object **bos, - int bo_count, - struct dma_fence *fence) +static void panfrost_attach_object_fences(struct panfrost_job *job) { int i; - for (i = 0; i < bo_count; i++) - dma_resv_add_excl_fence(bos[i]->resv, fence); + for (i = 0; i < job->bo_count; i++) { + struct drm_gem_object *obj = job->bos[i].obj; + + if (job->bos[i].flags & PANFROST_SUBMIT_BO_WRITE) + dma_resv_add_excl_fence(obj->resv, + job->render_done_fence); + else + dma_resv_add_shared_fence(obj->resv, + job->render_done_fence); + } +} + +static int panfrost_job_lock_bos(struct panfrost_job *job, + struct ww_acquire_ctx *acquire_ctx) +{ + int contended = -1; + int i, ret; + + ww_acquire_init(acquire_ctx, &reservation_ww_class); + +retry: + if (contended != -1) { + struct drm_gem_object *obj = job->bos[contended].obj; + + ret = ww_mutex_lock_slow_interruptible(&obj->resv->lock, + acquire_ctx); + if (ret) { + ww_acquire_done(acquire_ctx); + return ret; + } + } + + for (i = 0; i < job->bo_count; i++) { + if (i == contended) + continue; + + ret = ww_mutex_lock_interruptible(&job->bos[i].obj->resv->lock, + acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) + ww_mutex_unlock(&job->bos[j].obj->resv->lock); + + if (contended != -1 && contended >= i) { + struct drm_gem_object *contended_obj; + + contended_obj = job->bos[contended].obj; + ww_mutex_unlock(&contended_obj->resv->lock); + } + + if (ret == -EDEADLK) { + contended = i; + goto retry; + } + + ww_acquire_done(acquire_ctx); + return ret; + } + } + + ww_acquire_done(acquire_ctx); + + return 0; +} + +static void panfrost_job_unlock_bos(struct panfrost_job *job, + struct ww_acquire_ctx *acquire_ctx) +{ + int i; + + for (i = 0; i < job->bo_count; i++) + ww_mutex_unlock(&job->bos[i].obj->resv->lock); + + ww_acquire_fini(acquire_ctx); } int panfrost_job_push(struct panfrost_job *job) @@ -223,8 +315,7 @@ int panfrost_job_push(struct panfrost_job *job) mutex_lock(&pfdev->sched_lock); - ret = drm_gem_lock_reservations(job->bos, job->bo_count, - &acquire_ctx); + ret = panfrost_job_lock_bos(job, &acquire_ctx); if (ret) { mutex_unlock(&pfdev->sched_lock); return ret; @@ -240,18 +331,16 @@ int panfrost_job_push(struct panfrost_job *job) kref_get(&job->refcount); /* put by scheduler job completion */ - panfrost_acquire_object_fences(job->bos, job->bo_count, - job->implicit_fences); + panfrost_acquire_object_fences(job); drm_sched_entity_push_job(&job->base, entity); mutex_unlock(&pfdev->sched_lock); - panfrost_attach_object_fences(job->bos, job->bo_count, - job->render_done_fence); + panfrost_attach_object_fences(job); unlock: - drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx); + panfrost_job_unlock_bos(job, &acquire_ctx); return ret; } @@ -267,20 +356,22 @@ static void panfrost_job_cleanup(struct kref *ref) dma_fence_put(job->in_fences[i]); kvfree(job->in_fences); } - if (job->implicit_fences) { - for (i = 0; i < job->bo_count; i++) - dma_fence_put(job->implicit_fences[i]); - kvfree(job->implicit_fences); + + for (i = 0; i < job->bo_count; i++) { + unsigned int j; + + dma_fence_put(job->bos[i].excl); + for (j = 0; j < job->bos[i].shared_count; j++) + dma_fence_put(job->bos[i].shared[j]); } + dma_fence_put(job->done_fence); dma_fence_put(job->render_done_fence); - if (job->bos) { - for (i = 0; i < job->bo_count; i++) - drm_gem_object_put_unlocked(job->bos[i]); - kvfree(job->bos); - } + for (i = 0; i < job->bo_count; i++) + drm_gem_object_put_unlocked(job->bos[i].obj); + kvfree(job->bos); kfree(job); } @@ -314,11 +405,22 @@ static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job } } - /* Implicit fences, max. one per BO */ + /* Implicit fences */ for (i = 0; i < job->bo_count; i++) { - if (job->implicit_fences[i]) { - fence = job->implicit_fences[i]; - job->implicit_fences[i] = NULL; + unsigned int j; + + if (job->bos[i].excl) { + fence = job->bos[i].excl; + job->bos[i].excl = NULL; + return fence; + } + + for (j = 0; j < job->bos[i].shared_count; j++) { + if (!job->bos[i].shared[j]) + continue; + + fence = job->bos[i].shared[j]; + job->bos[i].shared[j] = NULL; return fence; } } diff --git a/drivers/gpu/drm/panfrost/panfrost_job.h b/drivers/gpu/drm/panfrost/panfrost_job.h index 62454128a792..53440640a6e3 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.h +++ b/drivers/gpu/drm/panfrost/panfrost_job.h @@ -11,6 +11,14 @@ struct panfrost_device; struct panfrost_gem_object; struct panfrost_file_priv; +struct panfrost_job_bo_desc { + struct drm_gem_object *obj; + struct dma_fence *excl; + struct dma_fence **shared; + unsigned int shared_count; + u32 flags; +}; + struct panfrost_job { struct drm_sched_job base; @@ -31,8 +39,7 @@ struct panfrost_job { __u32 flush_id; /* Exclusive fences we have taken from the BOs to wait for */ - struct dma_fence **implicit_fences; - struct drm_gem_object **bos; + struct panfrost_job_bo_desc *bos; u32 bo_count; /* Fence to be signaled by drm-sched once its done with the job */ diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index ec19db1eead8..029c6ae1b1f0 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -40,6 +40,28 @@ extern "C" { #define DRM_IOCTL_PANFROST_PERFCNT_DUMP DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_PERFCNT_DUMP, struct drm_panfrost_perfcnt_dump) #define PANFROST_JD_REQ_FS (1 << 0) + +#define PANFROST_SUBMIT_BO_READ (1 << 0) +#define PANFROST_SUBMIT_BO_WRITE (1 << 1) +#define PANFROST_SUBMIT_BO_RW (PANFROST_SUBMIT_BO_READ | \ + PANFROST_SUBMIT_BO_WRITE) +#define PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE (1 << 2) +#define PANFROST_SUBMIT_BO_VALID_FLAGS \ + (PANFROST_SUBMIT_BO_RW | PANFROST_SUBMIT_BO_NO_IMPLICIT_FENCE) + +/** + * struct drm_panfrost_submit_bo - BO descriptor passed to the submit ioctl. + * + * Useful to give detailed information about BOs used by a given job. + * + * @handle: the GEM handle + * @flags: a combination of PANFROST_SUBMIT_BO_* + */ +struct drm_panfrost_submit_bo { + __u32 handle; + __u32 flags; +}; + /** * struct drm_panfrost_submit - ioctl argument for submitting commands to the 3D * engine. @@ -68,6 +90,25 @@ struct drm_panfrost_submit { /** A combination of PANFROST_JD_REQ_* */ __u32 requirements; + + /** + * Pointer to a u32 array of &drm_panfrost_submit_bo_desc objects. This + * field is meant to replace &drm_panfrost_submit.bo_handles which did + * not provide enough information to relax synchronization between + * jobs that only only read the BO they share. When both + * &drm_panfrost_submit.bo_handles and &drm_panfrost_submit.bo_descs + * are provided, drm_panfrost_submit.bo_handles is ignored. + */ + __u64 bo_descs; + + /** + * Number of BO descriptors passed in (size is that times + * sizeof(drm_panfrost_submit_bo_desc)). + */ + __u32 bo_desc_count; + + /** Padding, must be 0. */ + __u32 pad; }; /** -- 2.21.0 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel