This should help limit the number of ioctls when submitting multiple jobs. The new ioctl also supports syncobj timelines and BO access flags. Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> --- drivers/gpu/drm/panfrost/panfrost_drv.c | 305 ++++++++++++++++++++++++ drivers/gpu/drm/panfrost/panfrost_job.c | 3 + include/uapi/drm/panfrost_drm.h | 84 +++++++ 3 files changed, 392 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 6529e5972b47..7ed0773a5c19 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -491,6 +491,310 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device *dev, void *data, return panfrost_submitqueue_destroy(priv, id); } +static int +panfrost_get_job_in_syncs(struct drm_file *file_priv, + u64 refs, u32 ref_stride, + u32 count, struct panfrost_job *job) +{ + const void __user *in = u64_to_user_ptr(refs); + unsigned int i; + int ret; + + if (!count) + return 0; + + for (i = 0; i < count; i++) { + struct drm_panfrost_syncobj_ref ref = { }; + struct dma_fence *fence; + + ret = copy_struct_from_user(&ref, sizeof(ref), + in + (i * ref_stride), + ref_stride); + if (ret) + return ret; + + if (ref.pad) + return -EINVAL; + + ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point, + 0, &fence); + if (ret) + return ret; + + ret = drm_gem_fence_array_add(&job->deps, fence); + if (ret) + return ret; + } + + return 0; +} + +struct panfrost_job_out_sync { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + +static void +panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + if (!out_syncs[i].syncobj) + break; + + drm_syncobj_put(out_syncs[i].syncobj); + kvfree(out_syncs[i].chain); + } + + kvfree(out_syncs); +} + +static struct panfrost_job_out_sync * +panfrost_get_job_out_syncs(struct drm_file *file_priv, + u64 refs, u32 ref_stride, + u32 count) +{ + void __user *in = u64_to_user_ptr(refs); + struct panfrost_job_out_sync *out_syncs; + unsigned int i; + int ret; + + if (!count) + return NULL; + + out_syncs = kvmalloc_array(count, sizeof(*out_syncs), + GFP_KERNEL | __GFP_ZERO); + if (!out_syncs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + struct drm_panfrost_syncobj_ref ref = { }; + + ret = copy_struct_from_user(&ref, sizeof(ref), + in + (i * ref_stride), + ref_stride); + if (ret) + goto err_free_out_syncs; + + if (ref.pad) { + ret = -EINVAL; + goto err_free_out_syncs; + } + + out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle); + if (!out_syncs[i].syncobj) { + ret = -EINVAL; + goto err_free_out_syncs; + } + + out_syncs[i].point = ref.point; + if (!out_syncs[i].point) + continue; + + out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain), + GFP_KERNEL); + if (!out_syncs[i].chain) { + ret = -ENOMEM; + goto err_free_out_syncs; + } + } + + return out_syncs; + +err_free_out_syncs: + panfrost_put_job_out_syncs(out_syncs, count); + return ERR_PTR(ret); +} + +static void +panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs, + unsigned int count, struct dma_fence *fence) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + if (out_syncs[i].chain) { + drm_syncobj_add_point(out_syncs[i].syncobj, + out_syncs[i].chain, + fence, out_syncs[i].point); + out_syncs[i].chain = NULL; + } else { + drm_syncobj_replace_fence(out_syncs[i].syncobj, + fence); + } + } +} + +#define PANFROST_BO_REF_ALLOWED_FLAGS \ + (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP) + +static int +panfrost_get_job_bos(struct drm_file *file_priv, + u64 refs, u32 ref_stride, u32 count, + struct panfrost_job *job) +{ + void __user *in = u64_to_user_ptr(refs); + unsigned int i; + + job->bo_count = count; + + if (!count) + return 0; + + job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos), + GFP_KERNEL | __GFP_ZERO); + job->bo_flags = kvmalloc_array(job->bo_count, + sizeof(*job->bo_flags), + GFP_KERNEL | __GFP_ZERO); + if (!job->bos || !job->bo_flags) + return -ENOMEM; + + for (i = 0; i < count; i++) { + struct drm_panfrost_bo_ref ref = { }; + int ret; + + ret = copy_struct_from_user(&ref, sizeof(ref), + in + (i * ref_stride), + ref_stride); + if (ret) + return ret; + + if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS)) + return -EINVAL; + + job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle); + if (!job->bos[i]) + return -EINVAL; + + job->bo_flags[i] = ref.flags; + } + + return 0; +} + +#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS + +static int +panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv, + struct panfrost_submitqueue *queue, + const struct drm_panfrost_job *args, + u32 bo_stride, u32 syncobj_stride) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_job_out_sync *out_syncs; + struct panfrost_job *job; + int ret; + + if (!args->head) + return -EINVAL; + + if (args->requirements & ~PANFROST_JD_ALLOWED_REQS) + return -EINVAL; + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return -ENOMEM; + + kref_init(&job->refcount); + + job->pfdev = pfdev; + job->jc = args->head; + job->requirements = args->requirements; + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); + job->file_priv = file_priv->driver_priv; + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); + + ret = panfrost_get_job_in_syncs(file_priv, + args->in_syncs, + syncobj_stride, + args->in_sync_count, + job); + if (ret) + goto err_put_job; + + out_syncs = panfrost_get_job_out_syncs(file_priv, + args->out_syncs, + syncobj_stride, + args->out_sync_count); + if (IS_ERR(out_syncs)) { + ret = PTR_ERR(out_syncs); + goto err_put_job; + } + + ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride, + args->bo_count, job); + if (ret) + goto err_put_job; + + ret = panfrost_get_job_mappings(file_priv, job); + if (ret) + goto err_put_job; + + ret = panfrost_job_push(queue, job); + if (ret) { + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); + goto err_put_job; + } + + panfrost_set_job_out_fence(out_syncs, args->out_sync_count, + job->render_done_fence); + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); + return 0; + +err_put_job: + panfrost_job_put(job); + return ret; +} + +static int +panfrost_ioctl_batch_submit(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_batch_submit *args = data; + void __user *jobs_args = u64_to_user_ptr(args->jobs); + struct panfrost_submitqueue *queue; + unsigned int i; + int ret; + + /* Relax this test if new fields are added to + * drm_panfrost_{bo_ref,syncobj_ref,job}. + */ + if (args->job_stride < sizeof(struct drm_panfrost_job) || + args->bo_ref_stride < sizeof(struct drm_panfrost_bo_ref) || + args->syncobj_ref_stride < sizeof(struct drm_panfrost_syncobj_ref)) + return -EINVAL; + + queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + for (i = 0; i < args->job_count; i++) { + struct drm_panfrost_job job_args = { }; + + ret = copy_struct_from_user(&job_args, sizeof(job_args), + jobs_args + (i * args->job_stride), + args->job_stride); + if (ret) { + args->fail_idx = i; + goto out_put_queue; + } + + ret = panfrost_submit_job(dev, file_priv, queue, &job_args, + args->bo_ref_stride, + args->syncobj_ref_stride); + if (ret) { + args->fail_idx = i; + goto out_put_queue; + } + } + +out_put_queue: + panfrost_submitqueue_put(queue); + return 0; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -570,6 +874,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, DRM_RENDER_ALLOW), PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, DRM_RENDER_ALLOW), + PANFROST_IOCTL(BATCH_SUBMIT, batch_submit, DRM_RENDER_ALLOW), }; DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 56ae89272e19..4e1540bce865 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) return ret; } + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) + continue; + ret = drm_gem_fence_array_add_implicit(&job->deps, job->bos[i], exclusive); if (ret) diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index 7ee02fd1ac75..ce0c1b96a58c 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -23,6 +23,7 @@ extern "C" { #define DRM_PANFROST_MADVISE 0x08 #define DRM_PANFROST_CREATE_SUBMITQUEUE 0x09 #define DRM_PANFROST_DESTROY_SUBMITQUEUE 0x0a +#define DRM_PANFROST_BATCH_SUBMIT 0x0b #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -33,6 +34,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue) #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32) +#define DRM_IOCTL_PANFROST_BATCH_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct drm_panfrost_batch_submit) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -241,7 +243,89 @@ struct drm_panfrost_create_submitqueue { __u32 id; /* out, identifier */ }; +/* Syncobj reference passed at job submission time to encode explicit + * input/output fences. + */ +struct drm_panfrost_syncobj_ref { + __u32 handle; + __u32 pad; + __u64 point; +}; + #define PANFROST_BO_REF_EXCLUSIVE 0x1 +#define PANFROST_BO_REF_NO_IMPLICIT_DEP 0x2 + +/* Describes a BO referenced by a job and the type of access. */ +struct drm_panfrost_bo_ref { + __u32 handle; + __u32 flags; +}; + +/* Describes a GPU job and the resources attached to it. */ +struct drm_panfrost_job { + /** GPU pointer to the head of the job chain. */ + __u64 head; + + /** + * Array of drm_panfrost_bo_ref objects describing the BOs referenced + * by this job. + */ + __u64 bos; + + /** + * Arrays of drm_panfrost_syncobj_ref objects describing the input + * and output fences. + */ + __u64 in_syncs; + __u64 out_syncs; + + /** Syncobj reference array sizes. */ + __u32 in_sync_count; + __u32 out_sync_count; + + /** BO reference array size. */ + __u32 bo_count; + + /** Combination of PANFROST_JD_REQ_* flags. */ + __u32 requirements; +}; + +/* Used to submit multiple jobs in one call */ +struct drm_panfrost_batch_submit { + /** + * Stride of the jobs array (needed to ease extension of the + * BATCH_SUBMIT ioctl). Should be set to + * sizeof(struct drm_panfrost_job). + */ + __u32 job_stride; + + /** Number of jobs to submit. */ + __u32 job_count; + + /* Pointer to a job array. */ + __u64 jobs; + + /** + * Stride of the BO and syncobj reference arrays (needed to ease + * extension of the BATCH_SUBMIT ioctl). Should be set to + * sizeof(struct drm_panfrost_bo_ref). + */ + __u32 bo_ref_stride; + __u32 syncobj_ref_stride; + + /** + * If the submission fails, this encodes the index of the job + * failed. + */ + __u32 fail_idx; + + /** + * ID of the queue to submit those jobs to. 0 is the default + * submit queue and should always exists. If you need a dedicated + * queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE. + */ + __u32 queue; +}; #if defined(__cplusplus) } -- 2.31.1