On 02/07/2021 15:32, Boris Brezillon wrote: > This should help limit the number of ioctls when submitting multiple > jobs. The new ioctl also supports syncobj timelines and BO access flags. > > v3: > * Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the > old submit path > > Signed-off-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx> Better, but I was hoping we can mostly delete panfrost_ioctl_submit(), leaving something along the lines of: static int panfrost_ioctl_submit(struct drm_device *dev, void *data, struct drm_file *file) { struct panfrost_submitqueue *queue; struct drm_panfrost_submit *args = data; struct drm_panfrost_job submit_args = { .head = args->jc, .bos = args->bo_handles, .in_syncs = args->in_syncs, .out_syncs = &args->out_sync, // FIXME .in_sync_count = args->in_sync_count, .out_sync_count = args->out_sync > 0 ? 1 : 0, .bo_count = args->bo_handle_count, .requirements = args->requirements }; int ret; queue = panfrost_submitqueue_get(file->driver_priv, 0); ret = panfrost_submit_job(dev, file, queue, &submit_args, sizeof(u32), ...); return ret; } But obviously the out_sync part needs special handling as we can't just pass a kernel pointer in like that ;) I'd like the above the duplication of things like this: > + kref_init(&job->refcount); > + > + job->pfdev = pfdev; > + job->jc = args->head; > + job->requirements = args->requirements; > + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); > + job->file_priv = file_priv->driver_priv; > + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); As otherwise someone is going to mess up in the future and this is going to diverge between the two ioctls. Steve > --- > drivers/gpu/drm/panfrost/panfrost_drv.c | 366 +++++++++++++++++++----- > drivers/gpu/drm/panfrost/panfrost_job.c | 3 + > include/uapi/drm/panfrost_drm.h | 84 ++++++ > 3 files changed, 375 insertions(+), 78 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c > index 6529e5972b47..e2897de6e77d 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -138,111 +138,95 @@ panfrost_get_job_mappings(struct drm_file *file_priv, struct panfrost_job *job) > return 0; > } > > -/** > - * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects > - * referenced by the job. > - * @dev: DRM device > - * @file_priv: DRM file for this fd > - * @args: IOCTL args > - * @job: job being set up > - * > - * Resolve handles from userspace to BOs and attach them to job. > - * > - * Note that this function doesn't need to unreference the BOs on > - * failure, because that will happen at panfrost_job_cleanup() time. > - */ > +#define PANFROST_BO_REF_ALLOWED_FLAGS \ > + (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP) > + > static int > -panfrost_lookup_bos(struct drm_device *dev, > - struct drm_file *file_priv, > - struct drm_panfrost_submit *args, > - struct panfrost_job *job) > +panfrost_get_job_bos(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, u32 count, > + struct panfrost_job *job) > { > + void __user *in = u64_to_user_ptr(refs); > unsigned int i; > - int ret; > > - job->bo_count = args->bo_handle_count; > + job->bo_count = count; > > - if (!job->bo_count) > + if (!count) > return 0; > > + job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos), > + GFP_KERNEL | __GFP_ZERO); > job->bo_flags = kvmalloc_array(job->bo_count, > sizeof(*job->bo_flags), > GFP_KERNEL | __GFP_ZERO); > - if (!job->bo_flags) > + if (!job->bos || !job->bo_flags) > return -ENOMEM; > > - for (i = 0; i < job->bo_count; i++) > - job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE; > + for (i = 0; i < count; i++) { > + struct drm_panfrost_bo_ref ref = { }; > + int ret; > > - ret = drm_gem_objects_lookup(file_priv, > - (void __user *)(uintptr_t)args->bo_handles, > - job->bo_count, &job->bos); > - if (ret) > - return ret; > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + return ret; > > - return panfrost_get_job_mappings(file_priv, job); > + /* Prior to the BATCH_SUBMIT ioctl all accessed BOs were > + * treated as exclusive. > + */ > + if (ref_stride == sizeof(u32)) > + ref.flags = PANFROST_BO_REF_EXCLUSIVE; > + > + if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS)) > + return -EINVAL; > + > + job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle); > + if (!job->bos[i]) > + return -EINVAL; > + > + job->bo_flags[i] = ref.flags; > + } > + > + return 0; > } > > -/** > - * panfrost_copy_in_sync() - Sets up job->deps with the sync objects > - * referenced by the job. > - * @dev: DRM device > - * @file_priv: DRM file for this fd > - * @args: IOCTL args > - * @job: job being set up > - * > - * Resolve syncobjs from userspace to fences and attach them to job. > - * > - * Note that this function doesn't need to unreference the fences on > - * failure, because that will happen at panfrost_job_cleanup() time. > - */ > static int > -panfrost_copy_in_sync(struct drm_device *dev, > - struct drm_file *file_priv, > - struct drm_panfrost_submit *args, > - struct panfrost_job *job) > +panfrost_get_job_in_syncs(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, > + u32 count, struct panfrost_job *job) > { > - u32 *handles; > - int ret = 0; > - int i, in_fence_count; > + const void __user *in = u64_to_user_ptr(refs); > + unsigned int i; > + int ret; > > - in_fence_count = args->in_sync_count; > - > - if (!in_fence_count) > + if (!count) > return 0; > > - handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); > - if (!handles) { > - ret = -ENOMEM; > - DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); > - goto fail; > - } > - > - if (copy_from_user(handles, > - (void __user *)(uintptr_t)args->in_syncs, > - in_fence_count * sizeof(u32))) { > - ret = -EFAULT; > - DRM_DEBUG("Failed to copy in syncobj handles\n"); > - goto fail; > - } > - > - for (i = 0; i < in_fence_count; i++) { > + for (i = 0; i < count; i++) { > + struct drm_panfrost_syncobj_ref ref = { }; > struct dma_fence *fence; > > - ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, > - &fence); > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > if (ret) > - goto fail; > + return ret; > + > + if (ref.pad) > + return -EINVAL; > + > + ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point, > + 0, &fence); > + if (ret) > + return ret; > > ret = drm_gem_fence_array_add(&job->deps, fence); > - > if (ret) > - goto fail; > + return ret; > } > > -fail: > - kvfree(handles); > - return ret; > + return 0; > } > > static int panfrost_ioctl_submit(struct drm_device *dev, void *data, > @@ -289,11 +273,17 @@ static int panfrost_ioctl_submit(struct drm_device *dev, void *data, > job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); > job->file_priv = file->driver_priv; > > - ret = panfrost_copy_in_sync(dev, file, args, job); > + ret = panfrost_get_job_in_syncs(file, args->in_syncs, sizeof(u32), > + args->in_sync_count, job); > if (ret) > goto fail_job; > > - ret = panfrost_lookup_bos(dev, file, args, job); > + ret = panfrost_get_job_bos(file, args->bo_handles, sizeof(u32), > + args->bo_handle_count, job); > + if (ret) > + goto fail_job; > + > + ret = panfrost_get_job_mappings(file, job); > if (ret) > goto fail_job; > > @@ -491,6 +481,225 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device *dev, void *data, > return panfrost_submitqueue_destroy(priv, id); > } > > +struct panfrost_job_out_sync { > + struct drm_syncobj *syncobj; > + struct dma_fence_chain *chain; > + u64 point; > +}; > + > +static void > +panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count) > +{ > + unsigned int i; > + > + for (i = 0; i < count; i++) { > + if (!out_syncs[i].syncobj) > + break; > + > + drm_syncobj_put(out_syncs[i].syncobj); > + kvfree(out_syncs[i].chain); > + } > + > + kvfree(out_syncs); > +} > + > +static struct panfrost_job_out_sync * > +panfrost_get_job_out_syncs(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, > + u32 count) > +{ > + void __user *in = u64_to_user_ptr(refs); > + struct panfrost_job_out_sync *out_syncs; > + unsigned int i; > + int ret; > + > + if (!count) > + return NULL; > + > + out_syncs = kvmalloc_array(count, sizeof(*out_syncs), > + GFP_KERNEL | __GFP_ZERO); > + if (!out_syncs) > + return ERR_PTR(-ENOMEM); > + > + for (i = 0; i < count; i++) { > + struct drm_panfrost_syncobj_ref ref = { }; > + > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + goto err_free_out_syncs; > + > + if (ref.pad) { > + ret = -EINVAL; > + goto err_free_out_syncs; > + } > + > + out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle); > + if (!out_syncs[i].syncobj) { > + ret = -EINVAL; > + goto err_free_out_syncs; > + } > + > + out_syncs[i].point = ref.point; > + if (!out_syncs[i].point) > + continue; > + > + out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain), > + GFP_KERNEL); > + if (!out_syncs[i].chain) { > + ret = -ENOMEM; > + goto err_free_out_syncs; > + } > + } > + > + return out_syncs; > + > +err_free_out_syncs: > + panfrost_put_job_out_syncs(out_syncs, count); > + return ERR_PTR(ret); > +} > + > +static void > +panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs, > + unsigned int count, struct dma_fence *fence) > +{ > + unsigned int i; > + > + for (i = 0; i < count; i++) { > + if (out_syncs[i].chain) { > + drm_syncobj_add_point(out_syncs[i].syncobj, > + out_syncs[i].chain, > + fence, out_syncs[i].point); > + out_syncs[i].chain = NULL; > + } else { > + drm_syncobj_replace_fence(out_syncs[i].syncobj, > + fence); > + } > + } > +} > + > +#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS > + > +static int > +panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv, > + struct panfrost_submitqueue *queue, > + const struct drm_panfrost_job *args, > + u32 bo_stride, u32 syncobj_stride) > +{ > + struct panfrost_device *pfdev = dev->dev_private; > + struct panfrost_job_out_sync *out_syncs; > + struct panfrost_job *job; > + int ret; > + > + if (!args->head) > + return -EINVAL; > + > + if (args->requirements & ~PANFROST_JD_ALLOWED_REQS) > + return -EINVAL; > + > + job = kzalloc(sizeof(*job), GFP_KERNEL); > + if (!job) > + return -ENOMEM; > + > + kref_init(&job->refcount); > + > + job->pfdev = pfdev; > + job->jc = args->head; > + job->requirements = args->requirements; > + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); > + job->file_priv = file_priv->driver_priv; > + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); > + > + ret = panfrost_get_job_in_syncs(file_priv, > + args->in_syncs, > + syncobj_stride, > + args->in_sync_count, > + job); > + if (ret) > + goto err_put_job; > + > + out_syncs = panfrost_get_job_out_syncs(file_priv, > + args->out_syncs, > + syncobj_stride, > + args->out_sync_count); > + if (IS_ERR(out_syncs)) { > + ret = PTR_ERR(out_syncs); > + goto err_put_job; > + } > + > + ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride, > + args->bo_count, job); > + if (ret) > + goto err_put_job; > + > + ret = panfrost_get_job_mappings(file_priv, job); > + if (ret) > + goto err_put_job; > + > + ret = panfrost_job_push(queue, job); > + if (ret) { > + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); > + goto err_put_job; > + } > + > + panfrost_set_job_out_fence(out_syncs, args->out_sync_count, > + job->render_done_fence); > + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); > + return 0; > + > +err_put_job: > + panfrost_job_put(job); > + return ret; > +} > + > +static int > +panfrost_ioctl_batch_submit(struct drm_device *dev, void *data, > + struct drm_file *file_priv) > +{ > + struct drm_panfrost_batch_submit *args = data; > + void __user *jobs_args = u64_to_user_ptr(args->jobs); > + struct panfrost_submitqueue *queue; > + unsigned int i; > + int ret; > + > + /* Relax this test if new fields are added to > + * drm_panfrost_{bo_ref,syncobj_ref,job}. > + */ > + if (args->job_stride < sizeof(struct drm_panfrost_job) || > + args->bo_ref_stride < sizeof(struct drm_panfrost_bo_ref) || > + args->syncobj_ref_stride < sizeof(struct drm_panfrost_syncobj_ref)) > + return -EINVAL; > + > + queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue); > + if (IS_ERR(queue)) > + return PTR_ERR(queue); > + > + for (i = 0; i < args->job_count; i++) { > + struct drm_panfrost_job job_args = { }; > + > + ret = copy_struct_from_user(&job_args, sizeof(job_args), > + jobs_args + (i * args->job_stride), > + args->job_stride); > + if (ret) { > + args->fail_idx = i; > + goto out_put_queue; > + } > + > + ret = panfrost_submit_job(dev, file_priv, queue, &job_args, > + args->bo_ref_stride, > + args->syncobj_ref_stride); > + if (ret) { > + args->fail_idx = i; > + goto out_put_queue; > + } > + } > + > +out_put_queue: > + panfrost_submitqueue_put(queue); > + return 0; > +} > + > int panfrost_unstable_ioctl_check(void) > { > if (!unstable_ioctls) > @@ -570,6 +779,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { > PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), > PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, DRM_RENDER_ALLOW), > PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, DRM_RENDER_ALLOW), > + PANFROST_IOCTL(BATCH_SUBMIT, batch_submit, DRM_RENDER_ALLOW), > }; > > DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > index 56ae89272e19..4e1540bce865 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) > return ret; > } > > + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) > + continue; > + > ret = drm_gem_fence_array_add_implicit(&job->deps, job->bos[i], > exclusive); > if (ret) > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index 7ee02fd1ac75..ce0c1b96a58c 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -23,6 +23,7 @@ extern "C" { > #define DRM_PANFROST_MADVISE 0x08 > #define DRM_PANFROST_CREATE_SUBMITQUEUE 0x09 > #define DRM_PANFROST_DESTROY_SUBMITQUEUE 0x0a > +#define DRM_PANFROST_BATCH_SUBMIT 0x0b > > #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) > #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) > @@ -33,6 +34,7 @@ extern "C" { > #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) > #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue) > #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32) > +#define DRM_IOCTL_PANFROST_BATCH_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct drm_panfrost_batch_submit) > > /* > * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module > @@ -241,7 +243,89 @@ struct drm_panfrost_create_submitqueue { > __u32 id; /* out, identifier */ > }; > > +/* Syncobj reference passed at job submission time to encode explicit > + * input/output fences. > + */ > +struct drm_panfrost_syncobj_ref { > + __u32 handle; > + __u32 pad; > + __u64 point; > +}; > + > #define PANFROST_BO_REF_EXCLUSIVE 0x1 > +#define PANFROST_BO_REF_NO_IMPLICIT_DEP 0x2 > + > +/* Describes a BO referenced by a job and the type of access. */ > +struct drm_panfrost_bo_ref { > + __u32 handle; > + __u32 flags; > +}; > + > +/* Describes a GPU job and the resources attached to it. */ > +struct drm_panfrost_job { > + /** GPU pointer to the head of the job chain. */ > + __u64 head; > + > + /** > + * Array of drm_panfrost_bo_ref objects describing the BOs referenced > + * by this job. > + */ > + __u64 bos; > + > + /** > + * Arrays of drm_panfrost_syncobj_ref objects describing the input > + * and output fences. > + */ > + __u64 in_syncs; > + __u64 out_syncs; > + > + /** Syncobj reference array sizes. */ > + __u32 in_sync_count; > + __u32 out_sync_count; > + > + /** BO reference array size. */ > + __u32 bo_count; > + > + /** Combination of PANFROST_JD_REQ_* flags. */ > + __u32 requirements; > +}; > + > +/* Used to submit multiple jobs in one call */ > +struct drm_panfrost_batch_submit { > + /** > + * Stride of the jobs array (needed to ease extension of the > + * BATCH_SUBMIT ioctl). Should be set to > + * sizeof(struct drm_panfrost_job). > + */ > + __u32 job_stride; > + > + /** Number of jobs to submit. */ > + __u32 job_count; > + > + /* Pointer to a job array. */ > + __u64 jobs; > + > + /** > + * Stride of the BO and syncobj reference arrays (needed to ease > + * extension of the BATCH_SUBMIT ioctl). Should be set to > + * sizeof(struct drm_panfrost_bo_ref). > + */ > + __u32 bo_ref_stride; > + __u32 syncobj_ref_stride; > + > + /** > + * If the submission fails, this encodes the index of the job > + * failed. > + */ > + __u32 fail_idx; > + > + /** > + * ID of the queue to submit those jobs to. 0 is the default > + * submit queue and should always exists. If you need a dedicated > + * queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE. > + */ > + __u32 queue; > +}; > > #if defined(__cplusplus) > } >