Am 20.04.23 um 16:47 schrieb Arunpravin Paneer Selvam:
Developed a userqueue fence driver for the userqueue process shared
BO synchronization.
Create a dma fence having write pointer as the seqno and allocate a
seq64 memory for each user queue process and feed this memory address
into the firmware/hardware, thus the firmware writes the read pointer
into the given address when the process completes it execution.
Compare wptr and rptr, if rptr >= wptr, signal the fences for the waiting
process to consume the buffers.
v2: Worked on review comments from Christian for the following
modifications
- Add wptr as sequence number into the fence
- Add a reference count for the fence driver
- Add dma_fence_put below the list_del as it might frees the userq fence.
- Trim unnecessary code in interrupt handler.
- Check dma fence signaled state in dma fence creation function for a
potential problem of hardware completing the job processing beforehand.
- Add necessary locks.
- Create a list and process all the unsignaled fences.
- clean up fences in destroy function.
- implement .signaled callback function
v3: Worked on review comments from Christian
- Modify naming convention for reference counted objects
- Fix fence driver reference drop issue
- Drop amdgpu_userq_fence_driver_process() function return value
That looks really good, just two more comments below.
Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@xxxxxxx>
---
build.sh | 11 +
drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 +
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 253 ++++++++++++++++++
.../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 64 +++++
drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 19 ++
.../gpu/drm/amd/include/amdgpu_userqueue.h | 1 +
7 files changed, 355 insertions(+), 1 deletion(-)
create mode 100755 build.sh
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
diff --git a/build.sh b/build.sh
new file mode 100755
index 000000000000..fddaff75dd2a
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,11 @@
+#make -j16 modules M=drivers/gpu/drm/
+#make -j16 modules M=drivers/gpu/drm/selftests/
+make -j16 modules M=drivers/gpu/drm/amd/amdgpu/
+#make -j16 modules M=drivers/gpu/drm/ttm/
+#cp drivers/gpu/drm/drm.ko /lib/modules/$(uname -r)/kernel/drivers/gpu/drm/
+#cp drivers/gpu/drm/selftests/test-drm_buddy.ko /lib/modules/$(uname -r)/kernel/drivers/gpu/drm/selftests/
+#cp drivers/gpu/drm/amd/amdgpu/amdgpu.ko /lib/modules/$(uname -r)/kernel/drivers/gpu/drm/amd/amdgpu/
+#cp drivers/gpu/drm/ttm/ttm.ko /lib/modules/$(uname -r)/kernel/drivers/gpu/drm/ttm/
+#update-initramfs -c -k $(uname -r)
+#reboot
+
I strongly assume you didn't intentionally committed that, did you?
Anyway, please remove :)
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index d39504e65db1..8ed9be0d4818 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
- amdgpu_ring_mux.o amdgpu_seq64.o
+ amdgpu_ring_mux.o amdgpu_seq64.o amdgpu_userq_fence.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 229976a2d0e7..e9c5047087d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -53,6 +53,7 @@
#include "amdgpu_xgmi.h"
#include "amdgpu_reset.h"
#include "amdgpu_userqueue.h"
+#include "amdgpu_userq_fence.h"
/*
* KMS wrapper.
@@ -2826,6 +2827,10 @@ static int __init amdgpu_init(void)
if (r)
goto error_fence;
+ r = amdgpu_userq_fence_slab_init();
+ if (r)
+ goto error_fence;
+
DRM_INFO("amdgpu kernel modesetting enabled.\n");
amdgpu_register_atpx_handler();
amdgpu_acpi_detect();
@@ -2850,6 +2855,7 @@ static void __exit amdgpu_exit(void)
amdgpu_unregister_atpx_handler();
amdgpu_sync_fini();
amdgpu_fence_slab_fini();
+ amdgpu_userq_fence_slab_fini();
mmu_notifier_synchronize();
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..a03d12f83147
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+ amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+ sizeof(struct amdgpu_userq_fence),
+ 0,
+ SLAB_HWCACHE_ALIGN,
+ NULL);
+ if (!amdgpu_userq_fence_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+ rcu_barrier();
+ kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *__f = container_of(f, struct amdgpu_userq_fence, base);
+
+ if (!__f)
+ return NULL;
+
+ if (__f->base.ops == &amdgpu_userq_fence_ops)
+ return __f;
+
+ return NULL;
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev,
+ struct amdgpu_usermode_queue *userq)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ int r;
+
+ fence_drv = userq->fence_drv;
+ if (!fence_drv)
+ return -EINVAL;
+
+ /* Acquire seq64 memory */
+ r = amdgpu_seq64_get(adev, &fence_drv->gpu_addr,
+ &fence_drv->cpu_addr);
+ if (r)
+ return -ENOMEM;
+
+ kref_init(&fence_drv->refcount);
+ INIT_LIST_HEAD(&fence_drv->fences);
+ spin_lock_init(&fence_drv->fence_list_lock);
+
+ fence_drv->adev = adev;
+ fence_drv->context = dma_fence_context_alloc(1);
+
+ get_task_comm(fence_drv->timeline_name, current);
+
+ return 0;
+}
+
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ struct amdgpu_userq_fence *userq_fence, *tmp;
+ struct dma_fence *fence;
+
+ if (!fence_drv)
+ return;
+
+ spin_lock(&fence_drv->fence_list_lock);
+ list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+ fence = &userq_fence->base;
+
+ if (amdgpu_userq_fence_read(fence_drv) >= fence->seqno) {
+ dma_fence_signal(fence);
+ list_del(&userq_fence->link);
+
+ dma_fence_put(fence);
+ } else {
+ break;
+ }
+ }
+ spin_unlock(&fence_drv->fence_list_lock);
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+ struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+ struct amdgpu_userq_fence_driver,
+ refcount);
+ struct amdgpu_device *adev = fence_drv->adev;
+ struct amdgpu_userq_fence *fence, *tmp;
+ struct dma_fence *f;
+
+ spin_lock(&fence_drv->fence_list_lock);
+ list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+ f = &fence->base;
+
+ if (!dma_fence_is_signaled(f)) {
+ dma_fence_set_error(f, -ECANCELED);
+ dma_fence_signal(f);
+ }
+
+ list_del(&fence->link);
+ dma_fence_put(f);
+ }
+
+ WARN_ON_ONCE(!list_empty(&fence_drv->fences));
+ spin_unlock(&fence_drv->fence_list_lock);
+
+ /* Free seq64 memory */
+ amdgpu_seq64_free(adev, fence_drv->gpu_addr);
+ kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_get(&fence_drv->refcount);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+ kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+ u64 seq, struct dma_fence **f)
+{
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_userq_fence *userq_fence;
+ struct dma_fence *fence;
+
+ fence_drv = userq->fence_drv;
+ if (!fence_drv)
+ return -EINVAL;
+
+ userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+ if (!userq_fence)
+ return -ENOMEM;
+
+ spin_lock_init(&userq_fence->lock);
+ INIT_LIST_HEAD(&userq_fence->link);
+ fence = &userq_fence->base;
+ userq_fence->fence_drv = fence_drv;
+
+ dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+ fence_drv->context, seq);
+
+ amdgpu_userq_fence_driver_get(fence_drv);
+ dma_fence_get(fence);
+
+ spin_lock(&fence_drv->fence_list_lock);
+ /* Check if hardware has already processed the job */
+ if (!dma_fence_is_signaled(fence)) {
+ list_add_tail(&userq_fence->link, &fence_drv->fences);
+ } else {
+ dma_fence_put(fence);
+ }
+ spin_unlock(&fence_drv->fence_list_lock);
+
+ *f = fence;
+
+ return 0;
+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+ return "amdgpu_userqueue_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+ return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+ struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+ struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+ u64 rptr, wptr;
+
+ rptr = amdgpu_userq_fence_read(fence_drv);
+ wptr = fence->base.seqno;
+
+ if (rptr >= wptr)
+ return true;
+
+ return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+ struct dma_fence *fence = container_of(rcu, struct dma_fence, rcu);
+ struct amdgpu_userq_fence *userq_fence = to_amdgpu_userq_fence(fence);
+ struct amdgpu_userq_fence_driver *fence_drv = userq_fence->fence_drv;
+
+ /* Release the fence driver reference */
+ amdgpu_userq_fence_driver_put(fence_drv);
+ kmem_cache_free(amdgpu_userq_fence_slab, userq_fence);
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+ call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+ .use_64bit_seqno = true,
+ .get_driver_name = amdgpu_userq_fence_get_driver_name,
+ .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+ .signaled = amdgpu_userq_fence_signaled,
+ .release = amdgpu_userq_fence_release,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..7329b4e5dd30
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_userqueue.h"
+
+struct amdgpu_userq_fence {
+ struct dma_fence base;
+ /* userq fence lock */
+ spinlock_t lock;
This one.
+ struct list_head link;
+ struct amdgpu_userq_fence_driver *fence_drv;
+};
+
+struct amdgpu_userq_fence_driver {
+ struct kref refcount;
+ u64 gpu_addr;
+ u64 *cpu_addr;
+ u64 context;
+ /* fence list lock */
+ spinlock_t fence_list_lock;
And that one here should have better documentation.
It's obvious what they are, so the existing doc doesn't help at all.
What you need to document is why they are separate.
+ struct list_head fences;
+ struct amdgpu_device *adev;
+ char timeline_name[TASK_COMM_LEN];
+};
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+ u64 seq, struct dma_fence **f);
+void amdgpu_userq_fence_driver_get(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_driver_alloc(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq);
+void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index fd4a2ca3302d..8918b176fdcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -24,6 +24,7 @@
#include "amdgpu.h"
#include "amdgpu_vm.h"
#include "amdgpu_userqueue.h"
+#include "amdgpu_userq_fence.h"
static inline int
amdgpu_userqueue_index(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
@@ -158,6 +159,8 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
+ struct amdgpu_userq_fence_driver *fence_drv;
+ struct amdgpu_device *adev = uq_mgr->adev;
uint64_t index;
int r;
@@ -173,6 +176,12 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
return -ENOMEM;
}
+ fence_drv = kzalloc(sizeof(struct amdgpu_userq_fence_driver), GFP_KERNEL);
+ if (!fence_drv) {
+ DRM_ERROR("Failed to allocate memory for fence driver\n");
+ return -ENOMEM;
+ }
+
Why don't we have this kzalloc() inside amdgpu_userq_fence_driver_alloc() ?
Christian.
mutex_lock(&uq_mgr->userq_mutex);
queue->userq_prop.wptr_gpu_addr = mqd_in->wptr_va;
queue->userq_prop.rptr_gpu_addr = mqd_in->rptr_va;
@@ -188,6 +197,13 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
goto free_queue;
}
+ queue->fence_drv = fence_drv;
+ r = amdgpu_userq_fence_driver_alloc(adev, queue);
+ if (r) {
+ DRM_ERROR("Failed to alloc fence driver\n");
+ goto free_fence_drv;
+ }
+
queue->userq_prop.doorbell_index = index;
queue->shadow_ctx_gpu_addr = mqd_in->shadow_va;
queue->queue_type = mqd_in->ip_type;
@@ -217,6 +233,8 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
mutex_unlock(&uq_mgr->userq_mutex);
return 0;
+free_fence_drv:
+ amdgpu_userq_fence_driver_put(queue->fence_drv);
free_queue:
mutex_unlock(&uq_mgr->userq_mutex);
kfree(queue);
@@ -238,6 +256,7 @@ static void amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
mutex_lock(&uq_mgr->userq_mutex);
uq_mgr->userq_funcs[queue->queue_type]->mqd_destroy(uq_mgr, queue);
amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
+ amdgpu_userq_fence_driver_put(queue->fence_drv);
mutex_unlock(&uq_mgr->userq_mutex);
kfree(queue);
}
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index eaab7cf5fff6..eaea88539007 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -50,6 +50,7 @@ struct amdgpu_usermode_queue {
struct amdgpu_mqd_prop userq_prop;
struct amdgpu_userq_ctx_space mqd;
struct amdgpu_userq_ctx_space fw_space;
+ struct amdgpu_userq_fence_driver *fence_drv;
};
struct amdgpu_userq_funcs {