From: Ketil Johnsen <ketil.johnsen@xxxxxxx> Allow userspace to mmap() the ring buffer, and the doorbell and user I/O pages, so that it can submit work directly to queues. Signed-off-by: Ketil Johnsen <ketil.johnsen@xxxxxxx> Co-developed-by: Mihail Atanassov <mihail.atanassov@xxxxxxx> Signed-off-by: Mihail Atanassov <mihail.atanassov@xxxxxxx> Co-developed-by: Akash Goel <akash.goel@xxxxxxx> Signed-off-by: Akash Goel <akash.goel@xxxxxxx> --- drivers/gpu/drm/panthor/panthor_device.h | 24 ++++ drivers/gpu/drm/panthor/panthor_drv.c | 69 ++++++++++- drivers/gpu/drm/panthor/panthor_sched.c | 151 ++++++++++++++++++----- drivers/gpu/drm/panthor/panthor_sched.h | 4 +- 4 files changed, 209 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index e388c0472ba7..7c27dbba8270 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -171,6 +171,9 @@ struct panthor_file { /** @ptdev: Device attached to this file. */ struct panthor_device *ptdev; + /** @drm_file: Corresponding drm_file */ + struct drm_file *drm_file; + /** @vms: VM pool attached to this file. */ struct panthor_vm_pool *vms; @@ -353,6 +356,27 @@ static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ pirq); \ } +/* + * We currently set the maximum of groups per file to an arbitrary low value. + * But this can be updated if we need more. + */ +#define MAX_GROUPS_PER_POOL 128 + +/* + * The maximum page size supported by the driver, when running with a smaller + * page size only the first page at this offset is valid. + */ +#define DRM_PANTHOR_MAX_PAGE_SHIFT 16 + +/* Carve out private MMIO offsets */ +#define PANTHOR_PRIVATE_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET + (1ull << 42)) + +/* Give out file offset for doorbell pages based on the group handle */ +#define PANTHOR_DOORBELL_OFFSET(group) (PANTHOR_PRIVATE_MMIO_OFFSET + \ + ((group) << DRM_PANTHOR_MAX_PAGE_SHIFT)) +#define PANTHOR_DOORBELL_OFFSET_START PANTHOR_DOORBELL_OFFSET(0) +#define PANTHOR_DOORBELL_OFFSET_END PANTHOR_DOORBELL_OFFSET(MAX_GROUPS_PER_POOL) + extern struct workqueue_struct *panthor_cleanup_wq; #endif diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 4f1efe616698..0bd600c464b8 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -67,6 +67,54 @@ panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const v return 0; } +/** + * panthor_set_uobj_array() - Copy a kernel object array into a user object array. + * @out: The object array to copy to. + * @min_stride: Minimum array stride. + * @obj_size: Kernel object size. + * + * Helper automating kernel -> user object copies. + * + * Don't use this function directly, use PANTHOR_UOBJ_SET_ARRAY() instead. + * + * Return: 0 on success, a negative error code otherwise. + */ +static int +panthor_set_uobj_array(const struct drm_panthor_obj_array *out, u32 min_stride, u32 obj_size, + const void *in) +{ + if (out->stride < min_stride) + return -EINVAL; + + if (!out->count) + return 0; + + if (obj_size == out->stride) { + if (copy_to_user(u64_to_user_ptr(out->array), in, + (unsigned long)obj_size * out->count)) + return -EFAULT; + } else { + u32 cpy_elem_size = min_t(u32, out->stride, obj_size); + void __user *out_ptr = u64_to_user_ptr(out->array); + const void *in_ptr = in; + + for (u32 i = 0; i < out->count; i++) { + if (copy_to_user(out_ptr, in_ptr, cpy_elem_size)) + return -EFAULT; + + if (out->stride > obj_size && + clear_user(out_ptr + cpy_elem_size, out->stride - obj_size)) { + return -EFAULT; + } + + out_ptr += out->stride; + in_ptr += obj_size; + } + } + + return 0; +} + /** * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array. * @in: The object array to copy. @@ -182,6 +230,20 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, PANTHOR_UOBJ_MIN_SIZE(_src_obj), \ sizeof(_src_obj), &(_src_obj)) +/** + * PANTHOR_UOBJ_SET_ARRAY() - Copies from _src_array to @_dest_drm_panthor_obj_array.array. + * @_dest_drm_pvr_obj_array: The &struct drm_pvr_obj_array containing a __u64 raw + * pointer to the destination C array in user space and the size of each array + * element in user space (the 'stride'). + * @_src_array: The source C array object in kernel space. + * + * Return: Error code. See panthor_set_uobj_array(). + */ +#define PANTHOR_UOBJ_SET_ARRAY(_dest_drm_panthor_obj_array, _src_array) \ + panthor_set_uobj_array(_dest_drm_panthor_obj_array, \ + PANTHOR_UOBJ_MIN_SIZE((_src_array)[0]), \ + sizeof((_src_array)[0]), _src_array) + /** * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible * object array. @@ -1012,10 +1074,8 @@ static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, return ret; ret = panthor_group_create(pfile, args, queue_args); - if (ret >= 0) { - args->group_handle = ret; - ret = 0; - } + if (!ret) + ret = PANTHOR_UOBJ_SET_ARRAY(&args->queues, queue_args); kvfree(queue_args); return ret; @@ -1262,6 +1322,7 @@ panthor_open(struct drm_device *ddev, struct drm_file *file) } pfile->ptdev = ptdev; + pfile->drm_file = file; ret = panthor_vm_pool_create(pfile); if (ret) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index b2cf053b3601..ad160a821957 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -384,6 +384,21 @@ struct panthor_queue { */ u8 doorbell_id; + /** @doorbell_offset: file offset user space can use to map the doorbell page */ + u64 doorbell_offset; + + /** @ringbuf_offset: file offset user space can use to map the ring buffer + * + * Only valid when group is created with DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT + */ + u64 ringbuf_offset; + + /** @user_io_offset: file offset user space can use to map the two user IO pages + * + * Only valid when group is created with DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT + */ + u64 user_io_offset; + /** * @priority: Priority of the queue inside the group. * @@ -524,6 +539,12 @@ struct panthor_group { /** @ptdev: Device. */ struct panthor_device *ptdev; + /** @pfile: associated panthor_file */ + struct panthor_file *pfile; + + /** @handle: integer value used by user to refer to this group */ + u32 handle; + /** @vm: VM bound to the group. */ struct panthor_vm *vm; @@ -548,6 +569,9 @@ struct panthor_group { /** @priority: Group priority (check panthor_csg_priority). */ u8 priority; + /** @user_submit: true if user space controls submission */ + bool user_submit; + /** @blocked_queues: Bitmask reflecting the blocked queues. */ u32 blocked_queues; @@ -708,12 +732,6 @@ struct panthor_group { mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \ } while (0) -/* - * We currently set the maximum of groups per file to an arbitrary low value. - * But this can be updated if we need more. - */ -#define MAX_GROUPS_PER_POOL 128 - /** * struct panthor_group_pool - Group pool * @@ -836,6 +854,12 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_queue_put_syncwait_obj(queue); + if (queue->ringbuf_offset) + drm_vma_node_revoke(&queue->ringbuf->obj->vma_node, group->pfile->drm_file); + + if (queue->user_io_offset) + drm_vma_node_revoke(&queue->iface.mem->obj->vma_node, group->pfile->drm_file); + panthor_kernel_bo_destroy(queue->ringbuf); panthor_kernel_bo_destroy(queue->iface.mem); @@ -1552,7 +1576,7 @@ static void csg_slot_sync_update_locked(struct panthor_device *ptdev, lockdep_assert_held(&ptdev->scheduler->lock); - if (group) + if (group && !group->user_submit) group_queue_work(group, sync_upd); sched_queue_work(ptdev->scheduler, sync_upd); @@ -2019,10 +2043,12 @@ group_term_post_processing(struct panthor_group *group) } spin_unlock(&queue->fence_ctx.lock); - /* Manually update the syncobj seqno to unblock waiters. */ - syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj)); - syncobj->status = ~0; - syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno); + if (!group->user_submit) { + /* Manually update the syncobj seqno to unblock waiters. */ + syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj)); + syncobj->status = ~0; + syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno); + } sched_queue_work(group->ptdev->scheduler, sync_upd); } dma_fence_end_signalling(cookie); @@ -2785,6 +2811,9 @@ static void group_sync_upd_work(struct work_struct *work) u32 queue_idx; bool cookie; + if (drm_WARN_ON(&group->ptdev->base, group->user_submit)) + return; + cookie = dma_fence_begin_signalling(); for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { struct panthor_queue *queue = group->queues[queue_idx]; @@ -3021,6 +3050,9 @@ group_create_queue(struct panthor_group *group, if (args->priority > CSF_MAX_QUEUE_PRIO) return ERR_PTR(-EINVAL); + if (args->ringbuf_offset || args->user_io_offset || args->doorbell_offset) + return ERR_PTR(-EINVAL); + queue = kzalloc(sizeof(*queue), GFP_KERNEL); if (!queue) return ERR_PTR(-ENOMEM); @@ -3046,6 +3078,20 @@ group_create_queue(struct panthor_group *group, if (ret) goto err_free_queue; + if (group->user_submit) { + ret = drm_vma_node_allow(&queue->ringbuf->obj->vma_node, group->pfile->drm_file); + if (ret) + goto err_free_queue; + + ret = drm_gem_create_mmap_offset(queue->ringbuf->obj); + if (ret) { + drm_vma_node_revoke(&queue->ringbuf->obj->vma_node, group->pfile->drm_file); + goto err_free_queue; + } + + queue->ringbuf_offset = drm_vma_node_offset_addr(&queue->ringbuf->obj->vma_node); + } + queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev, &queue->iface.input, &queue->iface.output, @@ -3056,6 +3102,21 @@ group_create_queue(struct panthor_group *group, goto err_free_queue; } + if (group->user_submit) { + ret = drm_vma_node_allow(&queue->iface.mem->obj->vma_node, group->pfile->drm_file); + if (ret) + goto err_free_queue; + + ret = drm_gem_create_mmap_offset(queue->iface.mem->obj); + if (ret) { + drm_vma_node_revoke(&queue->iface.mem->obj->vma_node, + group->pfile->drm_file); + goto err_free_queue; + } + + queue->user_io_offset = drm_vma_node_offset_addr(&queue->iface.mem->obj->vma_node); + } + ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops, group->ptdev->scheduler->wq, 1, args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)), @@ -3075,11 +3136,9 @@ group_create_queue(struct panthor_group *group, return ERR_PTR(ret); } -#define MAX_GROUPS_PER_POOL 128 - int panthor_group_create(struct panthor_file *pfile, - const struct drm_panthor_group_create *group_args, - const struct drm_panthor_queue_create *queue_args) + struct drm_panthor_group_create *group_args, + struct drm_panthor_queue_create *queue_args) { struct panthor_device *ptdev = pfile->ptdev; struct panthor_group_pool *gpool = pfile->groups; @@ -3115,6 +3174,7 @@ int panthor_group_create(struct panthor_file *pfile, group->csg_id = -1; group->ptdev = ptdev; + group->pfile = pfile; group->max_compute_cores = group_args->max_compute_cores; group->compute_core_mask = group_args->compute_core_mask; group->max_fragment_cores = group_args->max_fragment_cores; @@ -3130,6 +3190,9 @@ int panthor_group_create(struct panthor_file *pfile, INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work); INIT_WORK(&group->release_work, group_release_work); + if (group_args->flags & DRM_PANTHOR_GROUP_CREATE_USER_SUBMIT) + group->user_submit = true; + group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id); if (!group->vm) { ret = -EINVAL; @@ -3152,25 +3215,27 @@ int panthor_group_create(struct panthor_file *pfile, goto err_put_group; } - group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm, - group_args->queues.count * - sizeof(struct panthor_syncobj_64b), - DRM_PANTHOR_BO_NO_MMAP, - DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | - DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, - PANTHOR_VM_KERNEL_AUTO_VA); - if (IS_ERR(group->syncobjs)) { - ret = PTR_ERR(group->syncobjs); - goto err_put_group; + if (!group->user_submit) { + group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm, + group_args->queues.count * + sizeof(struct panthor_syncobj_64b), + DRM_PANTHOR_BO_NO_MMAP, + DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | + DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, + PANTHOR_VM_KERNEL_AUTO_VA); + if (IS_ERR(group->syncobjs)) { + ret = PTR_ERR(group->syncobjs); + goto err_put_group; + } + + ret = panthor_kernel_bo_vmap(group->syncobjs); + if (ret) + goto err_put_group; + + memset(group->syncobjs->kmap, 0, + group_args->queues.count * sizeof(struct panthor_syncobj_64b)); } - ret = panthor_kernel_bo_vmap(group->syncobjs); - if (ret) - goto err_put_group; - - memset(group->syncobjs->kmap, 0, - group_args->queues.count * sizeof(struct panthor_syncobj_64b)); - for (i = 0; i < group_args->queues.count; i++) { group->queues[i] = group_create_queue(group, &queue_args[i]); if (IS_ERR(group->queues[i])) { @@ -3188,6 +3253,21 @@ int panthor_group_create(struct panthor_file *pfile, if (ret) goto err_put_group; + group->handle = gid; + group_args->group_handle = gid; + + if (group->user_submit) { + for (i = 0; i < group_args->queues.count; i++) { + /* All queues in group use the same HW doorbell */ + group->queues[i]->doorbell_offset = PANTHOR_DOORBELL_OFFSET(gid - 1); + + /* copy to queue_args so these values can be returned to user */ + queue_args[i].doorbell_offset = group->queues[i]->doorbell_offset; + queue_args[i].ringbuf_offset = group->queues[i]->ringbuf_offset; + queue_args[i].user_io_offset = group->queues[i]->user_io_offset; + } + } + mutex_lock(&sched->reset.lock); if (atomic_read(&sched->reset.in_progress)) { panthor_group_stop(group); @@ -3199,7 +3279,7 @@ int panthor_group_create(struct panthor_file *pfile, } mutex_unlock(&sched->reset.lock); - return gid; + return 0; err_put_group: group_put(group); @@ -3390,6 +3470,11 @@ panthor_job_create(struct panthor_file *pfile, goto err_put_job; } + if (job->group->user_submit) { + ret = -EINVAL; + goto err_put_job; + } + if (job->queue_idx >= job->group->queue_count || !job->group->queues[job->queue_idx]) { ret = -EINVAL; diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h index 3a30d2328b30..55b6534fa390 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.h +++ b/drivers/gpu/drm/panthor/panthor_sched.h @@ -19,8 +19,8 @@ struct panthor_group_pool; struct panthor_job; int panthor_group_create(struct panthor_file *pfile, - const struct drm_panthor_group_create *group_args, - const struct drm_panthor_queue_create *queue_args); + struct drm_panthor_group_create *group_args, + struct drm_panthor_queue_create *queue_args); int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle); int panthor_group_get_state(struct panthor_file *pfile, struct drm_panthor_group_get_state *get_state); -- 2.45.0