Re: [PATCH v3 08/12] drm/amdgpu: use doorbell manager for kfd kernel doorbells

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2023-06-20 13:16, Shashank Sharma wrote:
This patch:
- adds a doorbell bo in kfd device structure.
- creates doorbell page for kfd kernel usages.
- updates the get_kernel_doorbell and free_kernel_doorbell functions
   accordingly

V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex)
V3:
  - Move single variable declaration below (Christian)
  - Add a to-do item to reuse the KGD kernel level doorbells for
    KFD for non-MES cases, instead of reserving one page (Felix)

Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: Christian Koenig <christian.koenig@xxxxxxx>
Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx>

Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx>


---
  drivers/gpu/drm/amd/amdkfd/kfd_device.c   |   2 -
  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 109 +++++++---------------
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h     |   6 ++
  3 files changed, 39 insertions(+), 78 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 00f528eb9812..36fbe9c840ee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -437,8 +437,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
  	atomic_set(&kfd->compute_profile, 0);
mutex_init(&kfd->doorbell_mutex);
-	memset(&kfd->doorbell_available_index, 0,
-		sizeof(kfd->doorbell_available_index));
atomic_set(&kfd->sram_ecc_flag, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 38c9e1ca6691..f7d45057ed32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
  /* Doorbell calculations for device init. */
  int kfd_doorbell_init(struct kfd_dev *kfd)
  {
-	size_t doorbell_start_offset;
-	size_t doorbell_aperture_size;
-	size_t doorbell_process_limit;
+	int size = PAGE_SIZE;
+	int r;
/*
-	 * With MES enabled, just set the doorbell base as it is needed
-	 * to calculate doorbell physical address.
-	 */
-	if (kfd->shared_resources.enable_mes) {
-		kfd->doorbell_base =
-			kfd->shared_resources.doorbell_physical_address;
-		return 0;
-	}
-
-	/*
-	 * We start with calculations in bytes because the input data might
-	 * only be byte-aligned.
-	 * Only after we have done the rounding can we assume any alignment.
+	 * Todo: KFD kernel level operations need only one doorbell for
+	 * ring test/HWS. So instead of reserving a whole page here for
+	 * kernel, reserve and consume a doorbell from existing KGD kernel
+	 * doorbell page.
  	 */
- doorbell_start_offset =
-			roundup(kfd->shared_resources.doorbell_start_offset,
-					kfd_doorbell_process_slice(kfd));
-
-	doorbell_aperture_size =
-			rounddown(kfd->shared_resources.doorbell_aperture_size,
-					kfd_doorbell_process_slice(kfd));
-
-	if (doorbell_aperture_size > doorbell_start_offset)
-		doorbell_process_limit =
-			(doorbell_aperture_size - doorbell_start_offset) /
-						kfd_doorbell_process_slice(kfd);
-	else
-		return -ENOSPC;
-
-	if (!kfd->max_doorbell_slices ||
-	    doorbell_process_limit < kfd->max_doorbell_slices)
-		kfd->max_doorbell_slices = doorbell_process_limit;
-
-	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
-				doorbell_start_offset;
-
-	kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
-
-	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
-					   kfd_doorbell_process_slice(kfd));
-
-	if (!kfd->doorbell_kernel_ptr)
+	/* Bitmap to dynamically allocate doorbells from kernel page */
+	kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
+	if (!kfd->doorbell_bitmap) {
+		DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
  		return -ENOMEM;
+	}
- pr_debug("Doorbell initialization:\n");
-	pr_debug("doorbell base           == 0x%08lX\n",
-			(uintptr_t)kfd->doorbell_base);
-
-	pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
-			kfd->doorbell_base_dw_offset);
-
-	pr_debug("doorbell_process_limit  == 0x%08lX\n",
-			doorbell_process_limit);
-
-	pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
-			(uintptr_t)kfd->doorbell_base);
-
-	pr_debug("doorbell aperture size  == 0x%08lX\n",
-			kfd->shared_resources.doorbell_aperture_size);
-
-	pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
+	/* Alloc a doorbell page for KFD kernel usages */
+	r = amdgpu_bo_create_kernel(kfd->adev,
+				    size,
+				    PAGE_SIZE,
+				    AMDGPU_GEM_DOMAIN_DOORBELL,
+				    &kfd->doorbells,
+				    NULL,
+				    (void **)&kfd->doorbell_kernel_ptr);
+	if (r) {
+		pr_err("failed to allocate kernel doorbells\n");
+		bitmap_free(kfd->doorbell_bitmap);
+		return r;
+	}
+ pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
  	return 0;
  }
void kfd_doorbell_fini(struct kfd_dev *kfd)
  {
-	if (kfd->doorbell_kernel_ptr)
-		iounmap(kfd->doorbell_kernel_ptr);
+	bitmap_free(kfd->doorbell_bitmap);
+	amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
+			     (void **)&kfd->doorbell_kernel_ptr);
  }
int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
@@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
  	u32 inx;
mutex_lock(&kfd->doorbell_mutex);
-	inx = find_first_zero_bit(kfd->doorbell_available_index,
-					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+	inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
- __set_bit(inx, kfd->doorbell_available_index);
+	__set_bit(inx, kfd->doorbell_bitmap);
  	mutex_unlock(&kfd->doorbell_mutex);
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
  		return NULL;
- inx *= kfd->device_info.doorbell_size / sizeof(u32);
-
-	/*
-	 * Calculating the kernel doorbell offset using the first
-	 * doorbell page.
-	 */
-	*doorbell_off = kfd->doorbell_base_dw_offset + inx;
+	*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
pr_debug("Get kernel queue doorbell\n"
  			"     doorbell offset   == 0x%08X\n"
@@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
  {
  	unsigned int inx;
- inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
-		* sizeof(u32) / kfd->device_info.doorbell_size;
+	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
mutex_lock(&kfd->doorbell_mutex);
-	__clear_bit(inx, kfd->doorbell_available_index);
+	__clear_bit(inx, kfd->doorbell_bitmap);
  	mutex_unlock(&kfd->doorbell_mutex);
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 94a438956868..42c215782d72 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -349,6 +349,12 @@ struct kfd_dev {
/* HMM page migration MEMORY_DEVICE_PRIVATE mapping */
  	struct dev_pagemap pgmap;
+
+	/* Kernel doorbells for KFD device */
+	struct amdgpu_bo *doorbells;
+
+	/* bitmap for dynamic doorbell allocation from this object */
+	unsigned long *doorbell_bitmap;
  };
enum kfd_mempool {



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux