From: Ketil Johnsen <ketil.johnsen@xxxxxxx> Synchronisation between GPU queues in groups from the same VM is achieved using either 32b or 64b synchronisation objects (see panthor_syncobj.(c|h)). They are also the mechanism to sync between a queue and the kernel (or the user application). To allow for the latter case, introduce cross-group sync (XGS) queues. They are a drm scheduler-entity pair, associated with a VM, which runs XGS jobs -- WAITs or SETs/ADDs on those HW syncobjs -- to enable a userspace driver to do CPU-to-GPU (and vice-versa) synchronisation, and link that up with DRM sync primitives. Signed-off-by: Ketil Johnsen <ketil.johnsen@xxxxxxx> Co-developed-by: Akash Goel <akash.goel@xxxxxxx> Signed-off-by: Akash Goel <akash.goel@xxxxxxx> Signed-off-by: Mihail Atanassov <mihail.atanassov@xxxxxxx> --- drivers/gpu/drm/panthor/Makefile | 3 +- drivers/gpu/drm/panthor/panthor_device.h | 4 + drivers/gpu/drm/panthor/panthor_drv.c | 123 ++++- drivers/gpu/drm/panthor/panthor_sched.c | 25 +- drivers/gpu/drm/panthor/panthor_sched.h | 1 + drivers/gpu/drm/panthor/panthor_xgs.c | 638 +++++++++++++++++++++++ drivers/gpu/drm/panthor/panthor_xgs.h | 42 ++ 7 files changed, 832 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/panthor/panthor_xgs.c create mode 100644 drivers/gpu/drm/panthor/panthor_xgs.h diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile index 0af27f33bfe2..7637bae47e26 100644 --- a/drivers/gpu/drm/panthor/Makefile +++ b/drivers/gpu/drm/panthor/Makefile @@ -10,6 +10,7 @@ panthor-y := \ panthor_heap.o \ panthor_mmu.o \ panthor_sched.o \ - panthor_syncobj.o + panthor_syncobj.o \ + panthor_xgs.o obj-$(CONFIG_DRM_PANTHOR) += panthor.o diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index 87cce384e36a..1e98d2a856b7 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -17,6 +17,7 @@ #include <drm/gpu_scheduler.h> #include <drm/panthor_drm.h> +struct panthor_xgs_queue_pool; struct panthor_csf; struct panthor_csf_ctx; struct panthor_device; @@ -182,6 +183,9 @@ struct panthor_file { /** @groups: Scheduling group pool attached to this file. */ struct panthor_group_pool *groups; + + /** @xgs_queues: XGS queues attached to this file. */ + struct panthor_xgs_queue_pool *xgs_queues; }; int panthor_device_init(struct panthor_device *ptdev); diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index ce2fdcd3fb42..681ac09b6343 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -19,6 +19,7 @@ #include <drm/gpu_scheduler.h> #include <drm/panthor_drm.h> +#include "panthor_xgs.h" #include "panthor_device.h" #include "panthor_fw.h" #include "panthor_gem.h" @@ -215,7 +216,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ - PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) + PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \ + PANTHOR_UOBJ_DECL(struct drm_panthor_xgs_op, pad)) /** * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. @@ -1316,6 +1318,114 @@ static int panthor_ioctl_group_kick(struct drm_device *ddev, void *data, return 0; } +static int panthor_ioctl_xgs_queue_create(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_xgs_queue_create *args = data; + struct panthor_file *pfile = file->driver_priv; + + if (args->pad) + return -EINVAL; + + return panthor_xgs_queue_create(pfile, args->vm_id, + args->eventfd_sync_update, &args->handle); +} + +static int panthor_ioctl_xgs_queue_destroy(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_xgs_queue_destroy *args = data; + struct panthor_file *pfile = file->driver_priv; + + if (args->pad) + return -EINVAL; + + return panthor_xgs_queue_destroy(pfile, args->handle); +} + +#define XGS_QUEUE_SUBMIT_FLAGS (DRM_PANTHOR_XGS_QUEUE_SUBMIT_ERROR_BARRIER_PRE | \ + DRM_PANTHOR_XGS_QUEUE_SUBMIT_ERROR_BARRIER_POST) + +static int panthor_ioctl_xgs_queue_submit(struct drm_device *ddev, void *data, + struct drm_file *file) +{ + struct drm_panthor_xgs_queue_submit *args = data; + struct panthor_file *pfile = file->driver_priv; + struct drm_panthor_xgs_op *ops_args; + struct panthor_xgs_queue *queue; + struct panthor_submit_ctx ctx; + struct drm_sched_job *job; + struct panthor_vm *vm; + int ret; + + if (args->flags & ~XGS_QUEUE_SUBMIT_FLAGS) + return -EINVAL; + + if (args->ops.count) { + ret = PANTHOR_UOBJ_GET_ARRAY(ops_args, &args->ops); + if (ret) + return ret; + } else { + ops_args = NULL; + } + + queue = panthor_xgs_queue_pool_get_xgs_queue(pfile->xgs_queues, args->handle); + if (!queue) + goto out_free_ops_args; + + ret = panthor_submit_ctx_init(&ctx, file, 1); + if (ret) + goto out_put_queue; + + /* Create job object */ + job = panthor_xgs_job_create(queue, ops_args, args->ops.count); + if (IS_ERR(job)) { + ret = PTR_ERR(job); + goto out_cleanup_submit_ctx; + } + + /* handed over to the job object */ + ops_args = NULL; + + /* attach sync operations */ + ret = panthor_submit_ctx_add_job(&ctx, 0, job, &args->syncs); + if (ret) + goto out_cleanup_submit_ctx; + + /* Collect signal operations on all */ + ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx); + if (ret) + goto out_cleanup_submit_ctx; + + /* The group already have a VM ref, so we don't need to take an extra one */ + vm = panthor_xgs_queue_vm(queue); + + /* We acquire/prepare revs on the job */ + drm_exec_until_all_locked(&ctx.exec) { + ret = panthor_vm_prepare_mapped_bos_resvs(&ctx.exec, vm, 1); + } + + if (ret) + goto out_cleanup_submit_ctx; + + /* Add deps, arm job fence and register the job fence to signal array */ + ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx); + if (ret) + goto out_cleanup_submit_ctx; + + /* Nothing can fail after that point */ + panthor_submit_ctx_push_jobs(&ctx, panthor_xgs_job_update_resvs); + +out_cleanup_submit_ctx: + panthor_submit_ctx_cleanup(&ctx, panthor_xgs_job_put); +out_put_queue: + panthor_xgs_queue_put(queue); +out_free_ops_args: + kvfree(ops_args); + + return ret; +} + static int panthor_open(struct drm_device *ddev, struct drm_file *file) { @@ -1343,9 +1453,16 @@ panthor_open(struct drm_device *ddev, struct drm_file *file) if (ret) goto err_destroy_vm_pool; + ret = panthor_xgs_queue_pool_create(pfile); + if (ret) + goto err_destroy_group_pool; + file->driver_priv = pfile; return 0; +err_destroy_group_pool: + panthor_group_pool_destroy(pfile); + err_destroy_vm_pool: panthor_vm_pool_destroy(pfile); @@ -1363,6 +1480,7 @@ panthor_postclose(struct drm_device *ddev, struct drm_file *file) struct panthor_file *pfile = file->driver_priv; panthor_group_pool_destroy(pfile); + panthor_xgs_queue_pool_destroy(pfile); panthor_vm_pool_destroy(pfile); kfree(pfile); @@ -1387,6 +1505,9 @@ static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW), PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), PANTHOR_IOCTL(GROUP_KICK, group_kick, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(XGS_QUEUE_CREATE, xgs_queue_create, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(XGS_QUEUE_DESTROY, xgs_queue_destroy, DRM_RENDER_ALLOW), + PANTHOR_IOCTL(XGS_QUEUE_SUBMIT, xgs_queue_submit, DRM_RENDER_ALLOW), }; static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index f272aeee8a8f..92172b2c6253 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -22,6 +22,7 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include "panthor_xgs.h" #include "panthor_devfreq.h" #include "panthor_device.h" #include "panthor_fw.h" @@ -1496,8 +1497,13 @@ static void csg_slot_sync_update_locked(struct panthor_device *ptdev, lockdep_assert_held(&ptdev->scheduler->lock); - if (group && !group->user_submit) - group_queue_work(group, sync_upd); + if (group) { + /* Rerun XGS jobs immediately, as this can potentially unblock the group */ + panthor_xgs_queue_pool_recheck(group->pfile); + + if (!group->user_submit) + group_queue_work(group, sync_upd); + } sched_queue_work(ptdev->scheduler, sync_upd); } @@ -1691,9 +1697,15 @@ static const char *queue_fence_get_timeline_name(struct dma_fence *fence) return "queue-fence"; } +static void job_fence_free(struct dma_fence *fence) +{ + dma_fence_free(fence); +} + static const struct dma_fence_ops panthor_queue_fence_ops = { .get_driver_name = fence_get_driver_name, .get_timeline_name = queue_fence_get_timeline_name, + .release = job_fence_free, }; struct panthor_csg_slots_upd_ctx { @@ -2431,6 +2443,10 @@ static void sync_upd_work(struct work_struct *work) if (unblocked_queues) { group->blocked_queues &= ~unblocked_queues; + /* Sync updates from XGS queue could happen when we are not ticking */ + if (sched->resched_target == U64_MAX) + immediate_tick = true; + if (group->csg_id < 0) { list_move(&group->run_node, &sched->groups.runnable[group->priority]); @@ -2788,6 +2804,11 @@ void panthor_sched_kick(struct panthor_file *pfile, u32 group_handle, u32 queue_ group_put(group); } +void panthor_sched_sync_update(struct panthor_device *ptdev) +{ + sched_queue_work(ptdev->scheduler, sync_upd); +} + static void group_sync_upd_work(struct work_struct *work) { struct panthor_group *group = diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h index 18fb7ad0952e..2cb58c66b8ac 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.h +++ b/drivers/gpu/drm/panthor/panthor_sched.h @@ -49,5 +49,6 @@ void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events); u8 panthor_sched_doorbell_id(struct panthor_file *pfile, u32 group_handle); void panthor_sched_kick(struct panthor_file *pfile, u32 group_handle, u32 queue_mask); +void panthor_sched_sync_update(struct panthor_device *ptdev); #endif diff --git a/drivers/gpu/drm/panthor/panthor_xgs.c b/drivers/gpu/drm/panthor/panthor_xgs.c new file mode 100644 index 000000000000..a900badb9224 --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_xgs.c @@ -0,0 +1,638 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT +/* Copyright 2024 ARM Limited. All rights reserved. */ + +#include <drm/drm_drv.h> +#include <drm/drm_gem.h> +#include <drm/drm_managed.h> +#include <drm/gpu_scheduler.h> +#include <drm/panthor_drm.h> + +#include <linux/iosys-map.h> + +#include "panthor_xgs.h" +#include "panthor_device.h" +#include "panthor_gem.h" +#include "panthor_mmu.h" +#include "panthor_sched.h" +#include "panthor_syncobj.h" + +#define JOB_TIMEOUT_MS 5000 + +/** + * struct panthor_xgs_queue - Scheduling group object + */ +struct panthor_xgs_queue { + /** @refcount: Reference count */ + struct kref refcount; + + /** @lock: Lock to synchronize access to this queue */ + struct mutex lock; + + /** @handle: integer value used by user to refer to this queue */ + u32 handle; + + /** @ptdev: Panthor device for which this queue was created */ + struct panthor_device *ptdev; + + /** @vm: VM associated with this XGS queue */ + struct panthor_vm *vm; + + /** @eventfd_sync_update_ctx: eventfd context to signal on XGS set ops */ + struct eventfd_ctx *eventfd_sync_update_ctx; + + /** @scheduler: scheduler instance used to execute XGS jobs */ + struct drm_gpu_scheduler scheduler; + + /** @entity: Scheduler entity with XGS jobs */ + struct drm_sched_entity entity; + + /** @fence_ctx: Fence context fields. */ + struct { + /** @lock: Used to protect access to all fences allocated by this context. */ + spinlock_t lock; + + /** + * @id: Fence context ID. + * + * Allocated with dma_fence_context_alloc(). + */ + u64 id; + + /** @seqno: Sequence number of the last initialized fence. */ + atomic64_t seqno; + + /** + * @in_flight_jobs: List containing all in-flight jobs. + * + * Used to keep track and signal panthor_job::done_fence when the + * synchronization object attached to the queue is signaled. + */ + struct list_head in_flight_jobs; + } fence_ctx; + + /** @destroyed: True if queue is marked for destruction and should not be used */ + bool destroyed; + + /** + * @release_work: Work used to release XGS queue resources. + * + * We need to postpone the queue release to avoid a deadlock, + * otherwise "free_job" could end up calling back into DRM sched. + */ + struct work_struct release_work; +}; + +/* + * We currently set the maximum of XGS queues per file to an arbitrary low value. + * But this can be updated if we need more. + */ +#define MAX_XGS_QUEUES_PER_POOL 128 + +/** + * struct panthor_xgs_queue_pool - XGS queue pool + * + * Each file get assigned a XGS queue pool. + */ +struct panthor_xgs_queue_pool { + /** @xa: Xarray used to manage XGS queue handles. */ + struct xarray xa; +}; + +/** + * struct panthor_xgs_job - Used to manage XGS job + */ +struct panthor_xgs_job { + /** @base: Inherit from drm_sched_job. */ + struct drm_sched_job base; + + /** @refcount: Reference count. */ + struct kref refcount; + + /** @group: XGS queue this job will be pushed to. */ + struct panthor_xgs_queue *queue; + + /** @ops: List of XGS operations to execute */ + struct drm_panthor_xgs_op *ops; + + /** @ops_count: Number of operations in the ops array */ + u32 ops_count; + + /** @done_fence: Fence signaled when the job is finished or cancelled. */ + struct dma_fence *done_fence; + + /** @node: Node used to insert job into in_flight_jobs list of queue */ + struct list_head node; + +}; + +static int panthor_xgs_try_run_job(struct panthor_xgs_job *job); + +static const char *xgs_fence_get_driver_name(struct dma_fence *fence) +{ + return "panthor"; +} + +static const char *xgs_fence_get_timeline_name(struct dma_fence *fence) +{ + return "xgs-fence"; +} + +static void xgs_fence_free(struct dma_fence *fence) +{ + dma_fence_free(fence); +} + +static const struct dma_fence_ops panthor_xgs_fence_ops = { + .get_driver_name = xgs_fence_get_driver_name, + .get_timeline_name = xgs_fence_get_timeline_name, + .release = xgs_fence_free, +}; + +static void xgs_queue_release_work(struct work_struct *work) +{ + struct panthor_xgs_queue *queue = container_of(work, struct panthor_xgs_queue, + release_work); + + if (queue->entity.fence_context) + drm_sched_entity_destroy(&queue->entity); + + if (queue->scheduler.ops) + drm_sched_fini(&queue->scheduler); + + panthor_vm_put(queue->vm); + + if (queue->eventfd_sync_update_ctx) + eventfd_ctx_put(queue->eventfd_sync_update_ctx); + + kfree(queue); +} + +static void xgs_queue_release(struct kref *kref) +{ + struct panthor_xgs_queue *queue = container_of(kref, struct panthor_xgs_queue, refcount); + struct panthor_device *ptdev = queue->ptdev; + + drm_WARN_ON(&ptdev->base, !list_empty(&queue->fence_ctx.in_flight_jobs)); + + queue_work(panthor_cleanup_wq, &queue->release_work); +} + +static struct panthor_xgs_queue *xgs_queue_get(struct panthor_xgs_queue *queue) +{ + if (queue) + kref_get(&queue->refcount); + + return queue; +} + +static void xgs_queue_recheck(struct panthor_xgs_queue *queue) +{ + struct panthor_xgs_job *job, *tmp; + int ret; + + mutex_lock(&queue->lock); + + list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) { + ret = panthor_xgs_try_run_job(job); + + if (ret != -EBUSY) { + /* completed or failed */ + list_del_init(&job->node); + + if (ret) + dma_fence_set_error(job->done_fence, -ETIMEDOUT); + + dma_fence_signal(job->done_fence); + + /* Ditch ref we took when adding it to the in_flight_jobs */ + panthor_xgs_job_put(&job->base); + } + } + + mutex_unlock(&queue->lock); +} + +/* Internal flag to mark operations as completed. Avoid clashes with drm_panthor_xgs_op_flags */ +#define DRM_PANTHOR_XGS_OP_COMPLETED (1 << 15) + +static int panthor_xgs_wait(struct panthor_xgs_queue *queue, struct drm_panthor_xgs_op *xgs_op) +{ + struct panthor_device *ptdev = queue->ptdev; + struct panthor_syncobj *syncobj; + int ret; + u64 value; + + if (xgs_op->flags & DRM_PANTHOR_XGS_OP_COMPLETED) + return 0; + + syncobj = panthor_syncobj_create(ptdev, queue->vm, xgs_op->addr, + xgs_op->format == DRM_PANTHOR_XGS_OP_FORMAT_U64); + if (IS_ERR_OR_NULL(syncobj)) + return PTR_ERR(syncobj); + + value = panthor_syncobj_get_value(syncobj); + + ret = -EBUSY; + + if (xgs_op->op == DRM_PANTHOR_XGS_OP_WAIT_LE) { + if (value <= xgs_op->value) { + ret = 0; + xgs_op->flags |= DRM_PANTHOR_XGS_OP_COMPLETED; + } + } else if (xgs_op->op == DRM_PANTHOR_XGS_OP_WAIT_GT) { + if (value > xgs_op->value) { + ret = 0; + xgs_op->flags |= DRM_PANTHOR_XGS_OP_COMPLETED; + } + } + + panthor_syncobj_release(syncobj); + + return ret; +} + +static void +panthor_xgs_signal(struct panthor_xgs_queue *queue, struct drm_panthor_xgs_op *xgs_op, u32 error) +{ + struct panthor_device *ptdev = queue->ptdev; + struct panthor_syncobj *syncobj; + u64 value; + + if (xgs_op->flags & DRM_PANTHOR_XGS_OP_COMPLETED) + return; + + syncobj = panthor_syncobj_create(ptdev, queue->vm, xgs_op->addr, + xgs_op->format == DRM_PANTHOR_XGS_OP_FORMAT_U64); + if (IS_ERR_OR_NULL(syncobj)) + return; + + value = panthor_syncobj_get_value(syncobj); + + if (xgs_op->op == DRM_PANTHOR_XGS_OP_SIGNAL_SET) + value = xgs_op->value; + else if (xgs_op->op == DRM_PANTHOR_XGS_OP_SIGNAL_ADD) + value += xgs_op->value; + + if (!error) + panthor_syncobj_signal(syncobj, value); + else + panthor_syncobj_signal_with_error(syncobj, value, error); + + panthor_sched_sync_update(ptdev); + + if (queue->eventfd_sync_update_ctx) + eventfd_signal(queue->eventfd_sync_update_ctx); + + xgs_op->flags |= DRM_PANTHOR_XGS_OP_COMPLETED; + + panthor_syncobj_release(syncobj); +} + +static int panthor_xgs_try_run_job(struct panthor_xgs_job *job) +{ + int i; + int err_wait = 0; + struct drm_panthor_xgs_op *xgs_op; + + lockdep_assert_held(&job->queue->lock); + + for (i = 0; i < job->ops_count; i++) { + xgs_op = &job->ops[i]; + + switch (xgs_op->op & ~DRM_PANTHOR_XGS_OP_COMPLETED) { + case DRM_PANTHOR_XGS_OP_WAIT_LE: + case DRM_PANTHOR_XGS_OP_WAIT_GT: + if (!err_wait) + err_wait = panthor_xgs_wait(job->queue, &job->ops[i]); + if (err_wait == -EBUSY) + return err_wait; + break; + case DRM_PANTHOR_XGS_OP_SIGNAL_SET: + case DRM_PANTHOR_XGS_OP_SIGNAL_ADD: + panthor_xgs_signal(job->queue, &job->ops[i], err_wait); + break; + default: + /* unknown operation, assume this could be a critical error */ + err_wait = -EINVAL; + break; + } + } + + return err_wait; +} + +static struct dma_fence *panthor_xgs_run_job(struct drm_sched_job *sched_job) +{ + struct panthor_xgs_job *job = container_of(sched_job, struct panthor_xgs_job, base); + struct panthor_xgs_queue *queue = job->queue; + struct dma_fence *done_fence; + int ret; + + mutex_lock(&queue->lock); + + ret = panthor_xgs_try_run_job(job); + if (ret == -EBUSY) { + dma_fence_init(job->done_fence, + &panthor_xgs_fence_ops, + &queue->fence_ctx.lock, + queue->fence_ctx.id, + atomic64_inc_return(&queue->fence_ctx.seqno)); + + done_fence = dma_fence_get(job->done_fence); + panthor_xgs_job_get(&job->base); + + list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs); + + } else if (ret) { + done_fence = ERR_PTR(ret); + } else { + /* job completed immediately, no need to return fence */ + done_fence = NULL; + } + + mutex_unlock(&queue->lock); + + return done_fence; +} + +static enum drm_gpu_sched_stat +panthor_xgs_job_timedout(struct drm_sched_job *sched_job) +{ + struct panthor_xgs_job *job = container_of(sched_job, struct panthor_xgs_job, base); + struct panthor_xgs_queue *queue = job->queue; + int ret; + + mutex_lock(&queue->lock); + + list_del_init(&job->node); + + /* Ditch ref we took when adding it to the in_flight_jobs */ + panthor_xgs_job_put(&job->base); + + ret = panthor_xgs_try_run_job(job); + if (ret) + dma_fence_set_error(job->done_fence, -ETIMEDOUT); + + mutex_unlock(&queue->lock); + + dma_fence_signal(job->done_fence); + + panthor_xgs_job_put(sched_job); + + return DRM_GPU_SCHED_STAT_NOMINAL; +} + +static const struct drm_sched_backend_ops panthor_xgs_sched_ops = { + .run_job = panthor_xgs_run_job, + .timedout_job = panthor_xgs_job_timedout, + .free_job = panthor_xgs_job_put, +}; + +int panthor_xgs_queue_create(struct panthor_file *pfile, u32 vm_id, + int eventfd_sync_update, u32 *handle) +{ + struct panthor_device *ptdev = pfile->ptdev; + struct panthor_xgs_queue_pool *xgs_queue_pool = pfile->xgs_queues; + struct panthor_xgs_queue *queue; + struct drm_gpu_scheduler *drm_sched; + int ret; + int qid; + + queue = kzalloc(sizeof(*queue), GFP_KERNEL); + if (!queue) + return -ENOMEM; + + kref_init(&queue->refcount); + INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs); + INIT_WORK(&queue->release_work, xgs_queue_release_work); + queue->ptdev = ptdev; + + ret = drmm_mutex_init(&ptdev->base, &queue->lock); + if (ret) + goto err_put_queue; + + if (eventfd_sync_update >= 0) { + queue->eventfd_sync_update_ctx = eventfd_ctx_fdget(eventfd_sync_update); + ret = PTR_ERR_OR_ZERO(queue->eventfd_sync_update_ctx); + if (ret) + goto err_put_queue; + } + + queue->vm = panthor_vm_pool_get_vm(pfile->vms, vm_id); + if (!queue->vm) { + ret = -EINVAL; + goto err_put_queue; + } + + ret = drm_sched_init(&queue->scheduler, &panthor_xgs_sched_ops, + NULL, 1, 1, 0, + msecs_to_jiffies(JOB_TIMEOUT_MS), + NULL, NULL, + "panthor_xgs", + ptdev->base.dev); + if (ret) + goto err_put_queue; + + drm_sched = &queue->scheduler; + ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL); + if (ret) + goto err_put_queue; + + queue->fence_ctx.id = dma_fence_context_alloc(1); + spin_lock_init(&queue->fence_ctx.lock); + + ret = xa_alloc(&xgs_queue_pool->xa, &qid, queue, + XA_LIMIT(1, MAX_XGS_QUEUES_PER_POOL), GFP_KERNEL); + if (ret) + goto err_put_queue; + + queue->handle = qid; + *handle = qid; + + return 0; + +err_put_queue: + panthor_xgs_queue_put(queue); + return ret; +} + +int panthor_xgs_queue_destroy(struct panthor_file *pfile, u32 handle) +{ + struct panthor_xgs_queue_pool *pool = pfile->xgs_queues; + struct panthor_xgs_queue *queue; + struct panthor_xgs_job *job, *tmp; + LIST_HEAD(faulty_jobs); + int err = -ECANCELED; + + queue = xa_erase(&pool->xa, handle); + if (!queue) + return -EINVAL; + + queue->destroyed = true; + + mutex_lock(&queue->lock); + + list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) { + list_move_tail(&job->node, &faulty_jobs); + dma_fence_set_error(job->done_fence, err); + dma_fence_signal(job->done_fence); + } + + mutex_unlock(&queue->lock); + + list_for_each_entry_safe(job, tmp, &faulty_jobs, node) { + list_del_init(&job->node); + /* Ditch ref we took when adding it to the in_flight_jobs */ + panthor_xgs_job_put(&job->base); + } + + panthor_xgs_queue_put(queue); + + return 0; +} + +void panthor_xgs_queue_put(struct panthor_xgs_queue *queue) +{ + if (queue) + kref_put(&queue->refcount, xgs_queue_release); +} + +struct panthor_vm *panthor_xgs_queue_vm(struct panthor_xgs_queue *queue) +{ + return queue->vm; +} + +int panthor_xgs_queue_pool_create(struct panthor_file *pfile) +{ + struct panthor_xgs_queue_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return -ENOMEM; + + xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1); + pfile->xgs_queues = pool; + return 0; +} + +void panthor_xgs_queue_pool_destroy(struct panthor_file *pfile) +{ + struct panthor_xgs_queue_pool *pool = pfile->xgs_queues; + struct panthor_xgs_queue *queue; + unsigned long i; + + if (IS_ERR_OR_NULL(pool)) + return; + + xa_for_each(&pool->xa, i, queue) + panthor_xgs_queue_destroy(pfile, i); + + xa_destroy(&pool->xa); + kfree(pool); + pfile->xgs_queues = NULL; +} + +struct panthor_xgs_queue *panthor_xgs_queue_pool_get_xgs_queue(struct panthor_xgs_queue_pool *pool, + u32 handle) +{ + struct panthor_xgs_queue *queue; + + queue = xgs_queue_get(xa_load(&pool->xa, handle)); + + return queue; +} + +void panthor_xgs_queue_pool_recheck(struct panthor_file *ptfile) +{ + unsigned long i; + struct panthor_xgs_queue *queue; + + xa_for_each(&ptfile->xgs_queues->xa, i, queue) + xgs_queue_recheck(queue); +} + +struct drm_sched_job *panthor_xgs_job_create(struct panthor_xgs_queue *queue, + struct drm_panthor_xgs_op *ops, u32 ops_count) +{ + struct panthor_xgs_job *job; + int ret; + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return ERR_PTR(-ENOMEM); + + kref_init(&job->refcount); + INIT_LIST_HEAD(&job->node); + + job->queue = xgs_queue_get(queue); + if (!job->queue) { + ret = -EINVAL; + goto err_put_job; + } + + job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); + if (!job->done_fence) { + ret = -ENOMEM; + goto err_put_job; + } + + ret = drm_sched_job_init(&job->base, &queue->entity, 1, queue); + if (ret) + goto err_put_job; + + /* take ownership of ops array */ + job->ops = ops; + job->ops_count = ops_count; + + return &job->base; + +err_put_job: + panthor_xgs_job_put(&job->base); + return ERR_PTR(ret); +} + +static void xgs_job_release(struct kref *ref) +{ + struct panthor_xgs_job *job = container_of(ref, struct panthor_xgs_job, refcount); + + drm_WARN_ON(&job->queue->ptdev->base, !list_empty(&job->node)); + + if (job->base.s_fence) + drm_sched_job_cleanup(&job->base); + + if (job->done_fence && job->done_fence->ops) + dma_fence_put(job->done_fence); + else + dma_fence_free(job->done_fence); + + panthor_xgs_queue_put(job->queue); + kvfree(job->ops); + kfree(job); +} + +struct drm_sched_job *panthor_xgs_job_get(struct drm_sched_job *sched_job) +{ + if (sched_job) { + struct panthor_xgs_job *job = container_of(sched_job, struct panthor_xgs_job, base); + + kref_get(&job->refcount); + } + + return sched_job; +} + +void panthor_xgs_job_put(struct drm_sched_job *sched_job) +{ + struct panthor_xgs_job *job = container_of(sched_job, struct panthor_xgs_job, base); + + if (sched_job) + kref_put(&job->refcount, xgs_job_release); +} + +void panthor_xgs_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job) +{ + struct panthor_xgs_job *job = container_of(sched_job, struct panthor_xgs_job, base); + + panthor_vm_update_resvs(job->queue->vm, exec, &sched_job->s_fence->finished, + DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); +} diff --git a/drivers/gpu/drm/panthor/panthor_xgs.h b/drivers/gpu/drm/panthor/panthor_xgs.h new file mode 100644 index 000000000000..fa7dd5e5ef83 --- /dev/null +++ b/drivers/gpu/drm/panthor/panthor_xgs.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* Copyright 2024 ARM Limited. All rights reserved. */ + +#ifndef __PANTHOR_XGS_H__ +#define __PANTHOR_XGS_H__ + +struct drm_exec; +struct drm_panthor_xgs_op; +struct drm_panthor_xgs_queue_create; +struct drm_sched_job; +struct panthor_xgs_queue; +struct panthor_xgs_queue_pool; +struct panthor_file; +struct panthor_vm; + +int panthor_xgs_queue_create(struct panthor_file *pfile, u32 vm_id, + int eventfd_sync_update, u32 *handle); +int panthor_xgs_queue_destroy(struct panthor_file *pfile, u32 handle); + +void panthor_xgs_queue_put(struct panthor_xgs_queue *queue); + +struct panthor_vm *panthor_xgs_queue_vm(struct panthor_xgs_queue *queue); + +int panthor_xgs_queue_pool_create(struct panthor_file *pfile); +void panthor_xgs_queue_pool_destroy(struct panthor_file *pfile); + +struct panthor_xgs_queue * +panthor_xgs_queue_pool_get_xgs_queue(struct panthor_xgs_queue_pool *pool, u32 handle); + +void panthor_xgs_queue_pool_recheck(struct panthor_file *ptfile); + +struct drm_sched_job * +panthor_xgs_job_create(struct panthor_xgs_queue *queue, + struct drm_panthor_xgs_op *ops, + u32 ops_count); + +void panthor_xgs_job_put(struct drm_sched_job *sched_job); +struct drm_sched_job *panthor_xgs_job_get(struct drm_sched_job *sched_job); + +void panthor_xgs_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job); + +#endif -- 2.45.0