Re: [PATCH 5/8] drm/amdgpu: Create context for usermode queue

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 07/02/2023 08:55, Christian König wrote:
Am 07.02.23 um 08:51 schrieb Shashank Sharma:

On 07/02/2023 08:14, Christian König wrote:
Am 03.02.23 um 22:54 schrieb Shashank Sharma:
The FW expects us to allocate atleast one page as context space to
process gang, process, shadow, GDS and FW_space related work. This
patch creates some object for the same, and adds an IP specific
functions to do this.

Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  32 +++++
  .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121 ++++++++++++++++++
  .../gpu/drm/amd/include/amdgpu_userqueue.h    |  18 +++
  3 files changed, 171 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
index 9f3490a91776..18281b3a51f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
@@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
      return idr_find(&uq_mgr->userq_idr, qid);
  }
  +static void
+amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+                                   struct amdgpu_usermode_queue *queue)
+{
+    uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
+}
+
+static int
+amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
+                                  struct amdgpu_usermode_queue *queue)
+{
+    int r;
+
+    r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
+    if (r) {
+        DRM_ERROR("Failed to create context space for queue\n");
+        return r;
+    }
+
+    return 0;
+}
+
  static int
  amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_usermode_queue *queue)
  {
@@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct drm_file *filp, union drm_amdgpu_userq
          goto free_qid;
      }
  +    r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);
+    if (r) {
+        DRM_ERROR("Failed to create context space\n");
+        goto free_mqd;
+    }
+
      list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
      args->out.q_id = queue->queue_id;
      args->out.flags = 0;
      mutex_unlock(&uq_mgr->userq_mutex);
      return 0;
  +free_mqd:
+    amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
+
  free_qid:
      amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
  @@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct drm_file *filp, int queue_id)
      }
        mutex_lock(&uq_mgr->userq_mutex);
+    amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
      amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
      amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
      list_del(&queue->userq_node);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
index 57889729d635..687f90a587e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
@@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr, struct amdgpu_
    }
  +static int amdgpu_userq_gfx_v11_ctx_create(struct amdgpu_userq_mgr *uq_mgr, +                                           struct amdgpu_usermode_queue *queue)
+{
+    int r;
+    struct amdgpu_device *adev = uq_mgr->adev;
+    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
+    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
+    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
+
+    /*
+     * The FW expects atleast one page space allocated for
+     * process context related work, and one for gang context.
+     */
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &pctx->obj,
+                                &pctx->gpu_addr,
+                                &pctx->cpu_ptr);

Again, don't use amdgpu_bo_create_kernel() for any of this.
Noted,

+    if (r) {
+        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
+        return r;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &gctx->obj,
+                                &gctx->gpu_addr,
+                                &gctx->cpu_ptr);
+    if (r) {
+        DRM_ERROR("Failed to allocate gang bo for userqueue (%d)", r);
+        goto err_gangctx;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &gdsctx->obj,
+                                &gdsctx->gpu_addr,
+                                &gdsctx->cpu_ptr);
+    if (r) {
+        DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)", r);
+        goto err_gdsctx;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &fwctx->obj,
+                                &fwctx->gpu_addr,
+                                &fwctx->cpu_ptr);
+    if (r) {
+        DRM_ERROR("Failed to allocate FW bo for userqueue (%d)", r);
+        goto err_fwctx;
+    }
+
+    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ, PAGE_SIZE,
+                                AMDGPU_GEM_DOMAIN_VRAM,
+                                &sctx->obj,
+                                &sctx->gpu_addr,
+                                &sctx->cpu_ptr);

Why the heck should we allocate so many different BOs for that? Can't we put all of this into one?
If you mean why don't we create one object of 5 * PAGE_SIZE and give 1-page spaced offsets for all of this, yes, that would further simplify things.

The reason why we kept it separate is that these objects could be of different sizes on different IPs/platforms, so we thought defining a separate

size macro and object for each of these will make it easier to understand how many FW page objects we are creating for this GEN IP. It can be

either ways.

But this is completely uninteresting for common code, isn't it?

I strongly think we should just create a single BO for each queue and put all the data (MQD, gang, GDS, FW, shadow) in it at different offsets.

This handling here is just overkill and rather error prone (BTW you used AMDGPU_USERQ_FW_CTX_SZ) twice.


Agree, we will fix this.

- Shashank

Christian.


- Shashank


Christian.

+    if (r) {
+        DRM_ERROR("Failed to allocate shadow bo for userqueue (%d)", r);
+        goto err_sctx;
+    }
+
+    return 0;
+
+err_sctx:
+    amdgpu_bo_free_kernel(&fwctx->obj,
+                          &fwctx->gpu_addr,
+                          &fwctx->cpu_ptr);
+
+err_fwctx:
+    amdgpu_bo_free_kernel(&gdsctx->obj,
+                          &gdsctx->gpu_addr,
+                          &gdsctx->cpu_ptr);
+
+err_gdsctx:
+    amdgpu_bo_free_kernel(&gctx->obj,
+                          &gctx->gpu_addr,
+                          &gctx->cpu_ptr);
+
+err_gangctx:
+    amdgpu_bo_free_kernel(&pctx->obj,
+                          &pctx->gpu_addr,
+                          &pctx->cpu_ptr);
+    return r;
+}
+
+static void amdgpu_userq_gfx_v11_ctx_destroy(struct amdgpu_userq_mgr *uq_mgr, +                                            struct amdgpu_usermode_queue *queue)
+{
+    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
+    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
+    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
+    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
+    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
+
+    amdgpu_bo_free_kernel(&sctx->obj,
+                          &sctx->gpu_addr,
+                          &sctx->cpu_ptr);
+
+    amdgpu_bo_free_kernel(&fwctx->obj,
+                          &fwctx->gpu_addr,
+                          &fwctx->cpu_ptr);
+
+    amdgpu_bo_free_kernel(&gdsctx->obj,
+                          &gdsctx->gpu_addr,
+                          &gdsctx->cpu_ptr);
+
+    amdgpu_bo_free_kernel(&gctx->obj,
+                          &gctx->gpu_addr,
+                          &gctx->cpu_ptr);
+
+    amdgpu_bo_free_kernel(&pctx->obj,
+                          &pctx->gpu_addr,
+                          &pctx->cpu_ptr);
+}
+
  static int amdgpu_userq_gfx_v11_mqd_size(struct amdgpu_userq_mgr *uq_mgr)
  {
      return sizeof(struct v11_gfx_mqd);
@@ -129,4 +248,6 @@ const struct amdgpu_userq_mqd_funcs userq_gfx_v11_mqd_funcs = {
      .mqd_size = amdgpu_userq_gfx_v11_mqd_size,
      .mqd_create = amdgpu_userq_gfx_v11_mqd_create,
      .mqd_destroy = amdgpu_userq_gfx_v11_mqd_destroy,
+    .ctx_create = amdgpu_userq_gfx_v11_ctx_create,
+    .ctx_destroy = amdgpu_userq_gfx_v11_ctx_destroy,
  };
diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
index a6abdfd5cb74..3adcd31618f7 100644
--- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
+++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
@@ -25,9 +25,19 @@
  #define AMDGPU_USERQUEUE_H_
    #define AMDGPU_MAX_USERQ 512
+#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_GDS_CTX_SZ PAGE_SIZE
+#define AMDGPU_USERQ_FW_CTX_SZ PAGE_SIZE
    struct amdgpu_userq_mqd_funcs;
  +struct amdgpu_userq_ctx {
+    struct amdgpu_bo *obj;
+    uint64_t gpu_addr;
+    void    *cpu_ptr;
+};
+
  struct amdgpu_userq_mgr {
      struct idr userq_idr;
      struct mutex userq_mutex;
@@ -52,6 +62,12 @@ struct amdgpu_usermode_queue {
      uint64_t    mqd_gpu_addr;
      void         *mqd_cpu_ptr;
  +    struct amdgpu_userq_ctx    proc_ctx;
+    struct amdgpu_userq_ctx    gang_ctx;
+    struct amdgpu_userq_ctx    gds_ctx;
+    struct amdgpu_userq_ctx    fw_ctx;
+    struct amdgpu_userq_ctx    shadow_ctx;
+
      struct amdgpu_bo    *mqd_obj;
      struct amdgpu_vm        *vm;
      struct amdgpu_userq_mgr *userq_mgr;
@@ -64,6 +80,8 @@ struct amdgpu_userq_mqd_funcs {
      int (*mqd_size)(struct amdgpu_userq_mgr *);
      int (*mqd_create)(struct amdgpu_userq_mgr *, struct amdgpu_usermode_queue *);       void (*mqd_destroy)(struct amdgpu_userq_mgr *, struct amdgpu_usermode_queue *); +    int (*ctx_create)(struct amdgpu_userq_mgr *, struct amdgpu_usermode_queue *); +    void (*ctx_destroy)(struct amdgpu_userq_mgr *, struct amdgpu_usermode_queue *);
  };
    int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct amdgpu_device *adev);





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux