Re: [PATCH 2/6] drm/amdgpu: Implement a new userqueue fence driver

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Christian,

On 2/27/2023 6:12 PM, Christian König wrote:
Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:
Developed a userqueue fence driver for the userqueue process shared
BO synchronization.

Create a dma fence having write pointer as the seqno and allocate a
seq64 memory for each user queue process and feed this memory address
into the firmware/hardware, thus the firmware writes the read pointer
into the given address when the process completes it execution.
Compare wptr and rptr, if rptr >= wptr, signal the fences for the waiting
process to consume the buffers.

v2: Worked on review comments from Christian for the following
     modifications

     - Add wptr as sequence number into the fence
     - Add a reference count for the fence driver
     - Add dma_fence_put below the list_del as it might frees the userq fence.
     - Trim unnecessary code in interrupt handler.
     - Check dma fence signaled state in dma fence creation function for a        potential problem of hardware completing the job processing beforehand.
     - Add necessary locks.
     - Create a list and process all the unsignaled fences.
     - clean up fences in destroy function.
     - implement .enabled callback function

A few more nit picks below, but from the technical side that looks mostly clean.


Signed-off-by: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/Makefile           |   2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |   6 +
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c   | 251 ++++++++++++++++++
  .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h   |  61 +++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  20 ++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |   2 +
  6 files changed, 341 insertions(+), 1 deletion(-)
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index a239533a895f..ea09273b585f 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
      amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
      amdgpu_fw_attestation.o amdgpu_securedisplay.o \
      amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
-    amdgpu_ring_mux.o amdgpu_seq64.o
+    amdgpu_ring_mux.o amdgpu_seq64.o amdgpu_userq_fence.o
    amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index bd3462d0da5f..6b7ac1ebd04c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -53,6 +53,7 @@
  #include "amdgpu_xgmi.h"
  #include "amdgpu_reset.h"
  #include "amdgpu_userqueue.h"
+#include "amdgpu_userq_fence.h"
    /*
   * KMS wrapper.
@@ -2827,6 +2828,10 @@ static int __init amdgpu_init(void)
      if (r)
          goto error_fence;
  +    r = amdgpu_userq_fence_slab_init();
+    if (r)
+        goto error_fence;
+
      DRM_INFO("amdgpu kernel modesetting enabled.\n");
      amdgpu_register_atpx_handler();
      amdgpu_acpi_detect();
@@ -2851,6 +2856,7 @@ static void __exit amdgpu_exit(void)
      amdgpu_unregister_atpx_handler();
      amdgpu_sync_fini();
      amdgpu_fence_slab_fini();
+    amdgpu_userq_fence_slab_fini();
      mmu_notifier_synchronize();
  }
  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
new file mode 100644
index 000000000000..609a7328e9a6
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/kref.h>
+#include <linux/slab.h>
+
+#include <drm/drm_syncobj.h>
+
+#include "amdgpu.h"
+#include "amdgpu_userq_fence.h"
+#include "amdgpu_userqueue.h"
+
+static struct kmem_cache *amdgpu_userq_fence_slab;
+
+int amdgpu_userq_fence_slab_init(void)
+{
+    amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
+                            sizeof(struct amdgpu_userq_fence),
+                            0,
+                            SLAB_HWCACHE_ALIGN,
+                            NULL);
+    if (!amdgpu_userq_fence_slab)
+        return -ENOMEM;
+
+    return 0;
+}
+
+void amdgpu_userq_fence_slab_fini(void)
+{
+    rcu_barrier();
+    kmem_cache_destroy(amdgpu_userq_fence_slab);
+}
+
+static inline struct amdgpu_userq_fence *to_amdgpu_userq_fence(struct dma_fence *f)
+{
+    struct amdgpu_userq_fence *__f = container_of(f, struct amdgpu_userq_fence, base);
+
+    if (!__f)
+        return NULL;
+
+    if (__f->base.ops == &amdgpu_userq_fence_ops)
+        return __f;
+
+    return NULL;
+}
+
+static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver *fence_drv)
+{
+    return le64_to_cpu(*fence_drv->cpu_addr);
+}
+
+int amdgpu_userq_fence_driver_get(struct amdgpu_device *adev,
+                  struct amdgpu_usermode_queue *userq)

Looks like you misunderstood me a bit.

The usual naming convention in Linux for reference counted objects is like that:

_alloc() or similar for a function creating the object (the one which has the kref_init).
_get() for the function grabbing a reference.
_put() for the function releasing a reference.
_free() or _destroy() or similar for the function which is called by _put() to cleanup.

+{
+    struct amdgpu_userq_fence_driver *fence_drv;
+    int r;
+
+    fence_drv = userq->fence_drv;
+    if (!fence_drv)
+        return -EINVAL;
+
+    /* Acquire seq64 memory */
+    r = amdgpu_seq64_get(adev, &fence_drv->gpu_addr,
+                 &fence_drv->cpu_addr);
+    if (r)
+        return -ENOMEM;
+
+    kref_init(&fence_drv->refcount);
+    INIT_LIST_HEAD(&fence_drv->fences);
+    spin_lock_init(&fence_drv->fence_list_lock);
+
+    fence_drv->adev = adev;
+    fence_drv->context = dma_fence_context_alloc(1);
+
+    get_task_comm(fence_drv->timeline_name, current);
+
+    return 0;
+}
+
+void amdgpu_userq_fence_driver_destroy(struct kref *ref)
+{
+    struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
+                     struct amdgpu_userq_fence_driver,
+                     refcount);
+    struct amdgpu_device *adev = fence_drv->adev;
+    struct amdgpu_userq_fence *fence, *tmp;
+    struct dma_fence *f;
+
+    spin_lock(&fence_drv->fence_list_lock);
+    list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
+        f = &fence->base;
+
+        if (!dma_fence_is_signaled(f)) {
+            dma_fence_set_error(f, -ECANCELED);
+            dma_fence_signal(f);
+        }
+
+        list_del(&fence->link);
+        dma_fence_put(f);
+    }
+
+    WARN_ON_ONCE(!list_empty(&fence_drv->fences));
+    spin_unlock(&fence_drv->fence_list_lock);
+
+    /* Free seq64 memory */
+    amdgpu_seq64_free(adev, fence_drv->gpu_addr);
+    kfree(fence_drv);
+}
+
+void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv)
+{
+    kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
+}
+
+int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+                  u64 seq, struct dma_fence **f)
+{
+    struct amdgpu_userq_fence_driver *fence_drv;
+    struct amdgpu_userq_fence *userq_fence;
+    struct dma_fence *fence;
+
+    fence_drv = userq->fence_drv;
+    if (!fence_drv)
+        return -EINVAL;
+
+    userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab, GFP_ATOMIC);
+    if (!userq_fence)
+        return -ENOMEM;
+
+    spin_lock_init(&userq_fence->lock);
+    INIT_LIST_HEAD(&userq_fence->link);
+    fence = &userq_fence->base;
+    userq_fence->fence_drv = fence_drv;
+
+    dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
+               fence_drv->context, seq);
+
+    kref_get(&fence_drv->refcount);
+
+    spin_lock(&fence_drv->fence_list_lock);
+    /* Check if hardware has already processed the job */
+    if (!dma_fence_is_signaled(fence)) {
+        dma_fence_get(fence);
+        list_add_tail(&userq_fence->link, &fence_drv->fences);
+        dma_fence_put(fence);
+    }
+    spin_unlock(&fence_drv->fence_list_lock);

+    /* Release the fence driver reference */
+    amdgpu_userq_fence_driver_put(fence_drv);

Hui? That doesn't make much sense. We should keep the reference as long as the fence exists or at least as long as it isn't signaled (the later is probably better, but tricky to get right).

+
+    *f = fence;
+
+    return 0;
+}
+
+bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver *fence_drv)

Maybe name that function amdgpu_userq_fence_driver_process() and move that up a bit to group together the functions dealing with the fence_driver and the functions dealing with the fence.

+{
+    struct amdgpu_userq_fence *userq_fence, *tmp;
+    struct dma_fence *fence;
+    u64 rptr;
+
+    if (!fence_drv)
+        return false;
+
+    rptr = amdgpu_userq_fence_read(fence_drv);
+
+    spin_lock(&fence_drv->fence_list_lock);
+    list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) {
+        fence = &userq_fence->base;
+
+        if (rptr >= fence->seqno) {
+            dma_fence_signal(fence);
+            list_del(&userq_fence->link);
+
+            dma_fence_put(fence);
+        } else {
+            break;
+        }
+    }
+    spin_unlock(&fence_drv->fence_list_lock);
+
+    return true;

BTW: What's the return value good for? Could we drop that?

Working on v3, will include all the review comments.

Thanks,
Arun

Regards,
Christian.

+}
+
+static const char *amdgpu_userq_fence_get_driver_name(struct dma_fence *f)
+{
+    return "amdgpu_userqueue_fence";
+}
+
+static const char *amdgpu_userq_fence_get_timeline_name(struct dma_fence *f)
+{
+    struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+
+    return fence->fence_drv->timeline_name;
+}
+
+static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
+{
+    struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
+    struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
+    u64 rptr, wptr;
+
+    rptr = amdgpu_userq_fence_read(fence_drv);
+    wptr = fence->base.seqno;
+
+    if (rptr >= wptr)
+        return true;
+
+    return false;
+}
+
+static void amdgpu_userq_fence_free(struct rcu_head *rcu)
+{
+    struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
+
+    kmem_cache_free(amdgpu_userq_fence_slab, to_amdgpu_userq_fence(f));
+}
+
+static void amdgpu_userq_fence_release(struct dma_fence *f)
+{
+    call_rcu(&f->rcu, amdgpu_userq_fence_free);
+}
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops = {
+    .use_64bit_seqno = true,
+    .get_driver_name = amdgpu_userq_fence_get_driver_name,
+    .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
+    .signaled = amdgpu_userq_fence_signaled,
+    .release = amdgpu_userq_fence_release,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
new file mode 100644
index 000000000000..230dd54b4cd3
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_USERQ_FENCE_H__
+#define __AMDGPU_USERQ_FENCE_H__
+
+#include <linux/types.h>
+
+struct amdgpu_userq_fence {
+    struct dma_fence base;
+    /* userq fence lock */
+    spinlock_t lock;
+    struct list_head link;
+    struct amdgpu_userq_fence_driver *fence_drv;
+};
+
+struct amdgpu_userq_fence_driver {
+    struct kref refcount;
+    u64 gpu_addr;
+    u64 *cpu_addr;
+    u64 context;
+    /* fence list lock */
+    spinlock_t fence_list_lock;
+    struct list_head fences;
+    struct amdgpu_device *adev;
+    char timeline_name[TASK_COMM_LEN];
+};
+
+static const struct dma_fence_ops amdgpu_userq_fence_ops;
+
+int amdgpu_userq_fence_slab_init(void);
+void amdgpu_userq_fence_slab_fini(void);
+int amdgpu_userq_fence_driver_get(struct amdgpu_device *adev, struct amdgpu_usermode_queue *userq); +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver *fence_drv);
+int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
+                  u64 seq, struct dma_fence **f);
+bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver *fence_drv);
+void amdgpu_userq_fence_driver_destroy(struct kref *ref);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 2f1aba1e9792..d4317b0f6487 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -23,6 +23,7 @@
    #include "amdgpu.h"
  #include "amdgpu_vm.h"
+#include "amdgpu_userq_fence.h"
    #define AMDGPU_USERQ_DOORBELL_INDEX (AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START + 4)   @@ -264,6 +265,8 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
      struct amdgpu_vm *vm = &fpriv->vm;
      struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
      struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
+    struct amdgpu_userq_fence_driver *fence_drv;
+    struct amdgpu_device *adev = uq_mgr->adev;
        pasid = vm->pasid;
      if (vm->pasid < 0) {
@@ -280,6 +283,19 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
          return -ENOMEM;
      }
  +    fence_drv = kzalloc(sizeof(struct amdgpu_userq_fence_driver), GFP_KERNEL);
+    if (!fence_drv) {
+        DRM_ERROR("Failed to allocate memory for fence driver\n");
+        return -ENOMEM;
+    }
+
+    queue->fence_drv = fence_drv;
+    r = amdgpu_userq_fence_driver_get(adev, queue);
+    if (r) {
+        DRM_ERROR("Failed to get fence driver\n");
+        goto free_fence_drv;
+    }
+
      queue->vm = vm;
      queue->pasid = pasid;
      queue->wptr_gpu_addr = mqd_in->wptr_va;
@@ -339,6 +355,9 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
  free_qid:
      amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  +free_fence_drv:
+    amdgpu_userq_fence_driver_put(queue->fence_drv);
+
  free_queue:
      mutex_unlock(&uq_mgr->userq_mutex);
      kfree(queue);
@@ -363,6 +382,7 @@ static void amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
      amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
      amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
      list_del(&queue->userq_node);
+    amdgpu_userq_fence_driver_put(queue->fence_drv);
      mutex_unlock(&uq_mgr->userq_mutex);
      kfree(queue);
  }
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index bda27583b58c..cf7264df8c46 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -73,6 +73,8 @@ struct amdgpu_usermode_queue {
      struct amdgpu_vm        *vm;
      struct amdgpu_userq_mgr *userq_mgr;
      struct list_head     userq_node;
+
+    struct amdgpu_userq_fence_driver *fence_drv;
  };
    int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux