Re: [PATCH v2 08/12] drm/amdgpu: use doorbell manager for kfd kernel doorbells

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hey Christian,

On 13/04/2023 13:02, Christian König wrote:
Am 12.04.23 um 18:25 schrieb Shashank Sharma:
This patch:
- adds a doorbell bo in kfd device structure.
- creates doorbell page for kfd kernel usages.
- updates the get_kernel_doorbell and free_kernel_doorbell functions
   accordingly

V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex)

Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>
---
  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 110 ++++++----------------
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h     |   6 ++
  3 files changed, 36 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index b8936340742b..49e3c4e021af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -435,8 +435,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
      atomic_set(&kfd->compute_profile, 0);
        mutex_init(&kfd->doorbell_mutex);
-    memset(&kfd->doorbell_available_index, 0,
-        sizeof(kfd->doorbell_available_index));
        atomic_set(&kfd->sram_ecc_flag, 0);
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index cd4e61bf0493..82b4a56b0afc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -61,81 +61,39 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
  /* Doorbell calculations for device init. */
  int kfd_doorbell_init(struct kfd_dev *kfd)
  {
-    size_t doorbell_start_offset;
-    size_t doorbell_aperture_size;
-    size_t doorbell_process_limit;
+    int r;
+    int size = PAGE_SIZE;

It's usually good practice to declare variables like "r" and "i" last. Some upstream maintainers even require reverse xmas tree here.
Noted, will change it.

  -    /*
-     * With MES enabled, just set the doorbell base as it is needed
-     * to calculate doorbell physical address.
-     */
-    if (kfd->shared_resources.enable_mes) {
-        kfd->doorbell_base =
-            kfd->shared_resources.doorbell_physical_address;
-        return 0;
-    }
-
-    /*
-     * We start with calculations in bytes because the input data might
-     * only be byte-aligned.
-     * Only after we have done the rounding can we assume any alignment.
-     */
-
-    doorbell_start_offset =
- roundup(kfd->shared_resources.doorbell_start_offset,
-                    kfd_doorbell_process_slice(kfd));
-
-    doorbell_aperture_size =
- rounddown(kfd->shared_resources.doorbell_aperture_size,
-                    kfd_doorbell_process_slice(kfd));
-
-    if (doorbell_aperture_size > doorbell_start_offset)
-        doorbell_process_limit =
-            (doorbell_aperture_size - doorbell_start_offset) /
-                        kfd_doorbell_process_slice(kfd);
-    else
-        return -ENOSPC;
-
-    if (!kfd->max_doorbell_slices ||
-        doorbell_process_limit < kfd->max_doorbell_slices)
-        kfd->max_doorbell_slices = doorbell_process_limit;
-
-    kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
-                doorbell_start_offset;
-
-    kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
-
-    kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
-                       kfd_doorbell_process_slice(kfd));
-
-    if (!kfd->doorbell_kernel_ptr)
+    /* Bitmap to dynamically allocate doorbells from kernel page */
+    kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
+    if (!kfd->doorbell_bitmap) {
+        DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
          return -ENOMEM;
+    }
  -    pr_debug("Doorbell initialization:\n");
-    pr_debug("doorbell base           == 0x%08lX\n",
-            (uintptr_t)kfd->doorbell_base);
-
-    pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
-            kfd->doorbell_base_dw_offset);
-
-    pr_debug("doorbell_process_limit  == 0x%08lX\n",
-            doorbell_process_limit);
-
-    pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
-            (uintptr_t)kfd->doorbell_base);
-
-    pr_debug("doorbell aperture size  == 0x%08lX\n",
-            kfd->shared_resources.doorbell_aperture_size);
-
-    pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
+    /* Alloc a doorbell page for KFD kernel usages */
+    r = amdgpu_bo_create_kernel(kfd->adev,
+                    size,
+                    PAGE_SIZE,
+                    AMDGPU_GEM_DOMAIN_DOORBELL,
+                    &kfd->doorbells,
+                    NULL,
+                    (void **)&kfd->doorbell_kernel_ptr);
+    if (r) {
+        pr_err("failed to allocate kernel doorbells\n");
+        bitmap_free(kfd->doorbell_bitmap);
+        return r;
+    }
  +    pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
      return 0;
  }
    void kfd_doorbell_fini(struct kfd_dev *kfd)
  {
-    if (kfd->doorbell_kernel_ptr)
-        iounmap(kfd->doorbell_kernel_ptr);
+    bitmap_free(kfd->doorbell_bitmap);
+    amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
+                 (void **)&kfd->doorbell_kernel_ptr);
  }
    int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, @@ -188,22 +146,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
      u32 inx;
        mutex_lock(&kfd->doorbell_mutex);
-    inx = find_first_zero_bit(kfd->doorbell_available_index,
-                    KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+    inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
  -    __set_bit(inx, kfd->doorbell_available_index);
+    __set_bit(inx, kfd->doorbell_bitmap);
      mutex_unlock(&kfd->doorbell_mutex);
        if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
          return NULL;
  -    inx *= kfd->device_info.doorbell_size / sizeof(u32);
-
-    /*
-     * Calculating the kernel doorbell offset using the first
-     * doorbell page.
-     */
-    *doorbell_off = kfd->doorbell_base_dw_offset + inx;
+    *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
        pr_debug("Get kernel queue doorbell\n"
              "     doorbell offset   == 0x%08X\n"
@@ -217,11 +168,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
  {
      unsigned int inx;
  -    inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
-        * sizeof(u32) / kfd->device_info.doorbell_size;
+    inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
        mutex_lock(&kfd->doorbell_mutex);
-    __clear_bit(inx, kfd->doorbell_available_index);
+    __clear_bit(inx, kfd->doorbell_bitmap);
      mutex_unlock(&kfd->doorbell_mutex);
  }
  @@ -280,7 +230,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
      if (!pdd->doorbell_index) {
          int r = kfd_alloc_process_doorbells(pdd->dev,
                              &pdd->doorbell_index);
-        if (r)
+        if (r < 0)
              return 0;
      }
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 552c3ac85a13..0b199eb6dc88 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -346,6 +346,12 @@ struct kfd_dev {
        /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
      struct dev_pagemap pgmap;
+
+    /* Kernel doorbells for KFD device */
+    struct amdgpu_bo *doorbells;
+
+    /* bitmap for dynamic doorbell allocation from this object */
+    unsigned long *doorbell_bitmap;

When doorbell_available_index is now not longer used you should probably remove it as well.

Actually, KFD used two levels of bitmaps to allocate doorbell:

- first level of bitmap to assign a pool of 1024 doorbells to a client from big doorbell pool,

- second level of bitmap to allocate one doorbell to the queue from the 1024 doorbells of the clients.

In the new design, we are allocating and saving a page of doorbells for KFD kernel, but we still need one bitmap to dynamically allocate and assign single doorbell from the page, both at process and kernel level. So doorbell_bitmap is replacement for doorbell_available_index in kfd, to make code symmetrical everywhere (kfd kernel, mes kernel and kfd process).

- Shashank


  };
    enum kfd_mempool {




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux