This patch: - adds a doorbell bo in kfd device structure. - creates doorbell page for kfd kernel usages. - updates the get_kernel_doorbell and free_kernel_doorbell functions accordingly V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex) V3: - Move single variable declaration below (Christian) - Add a to-do item to reuse the KGD kernel level doorbells for KFD for non-MES cases, instead of reserving one page (Felix) Cc: Alex Deucher <alexander.deucher@xxxxxxx> Cc: Christian Koenig <christian.koenig@xxxxxxx> Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx> Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 - drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 109 +++++++--------------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++ 3 files changed, 39 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 00f528eb9812..36fbe9c840ee 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -437,8 +437,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) atomic_set(&kfd->compute_profile, 0); mutex_init(&kfd->doorbell_mutex); - memset(&kfd->doorbell_available_index, 0, - sizeof(kfd->doorbell_available_index)); atomic_set(&kfd->sram_ecc_flag, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 38c9e1ca6691..f7d45057ed32 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) /* Doorbell calculations for device init. */ int kfd_doorbell_init(struct kfd_dev *kfd) { - size_t doorbell_start_offset; - size_t doorbell_aperture_size; - size_t doorbell_process_limit; + int size = PAGE_SIZE; + int r; /* - * With MES enabled, just set the doorbell base as it is needed - * to calculate doorbell physical address. - */ - if (kfd->shared_resources.enable_mes) { - kfd->doorbell_base = - kfd->shared_resources.doorbell_physical_address; - return 0; - } - - /* - * We start with calculations in bytes because the input data might - * only be byte-aligned. - * Only after we have done the rounding can we assume any alignment. + * Todo: KFD kernel level operations need only one doorbell for + * ring test/HWS. So instead of reserving a whole page here for + * kernel, reserve and consume a doorbell from existing KGD kernel + * doorbell page. */ - doorbell_start_offset = - roundup(kfd->shared_resources.doorbell_start_offset, - kfd_doorbell_process_slice(kfd)); - - doorbell_aperture_size = - rounddown(kfd->shared_resources.doorbell_aperture_size, - kfd_doorbell_process_slice(kfd)); - - if (doorbell_aperture_size > doorbell_start_offset) - doorbell_process_limit = - (doorbell_aperture_size - doorbell_start_offset) / - kfd_doorbell_process_slice(kfd); - else - return -ENOSPC; - - if (!kfd->max_doorbell_slices || - doorbell_process_limit < kfd->max_doorbell_slices) - kfd->max_doorbell_slices = doorbell_process_limit; - - kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + - doorbell_start_offset; - - kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32); - - kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, - kfd_doorbell_process_slice(kfd)); - - if (!kfd->doorbell_kernel_ptr) + /* Bitmap to dynamically allocate doorbells from kernel page */ + kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL); + if (!kfd->doorbell_bitmap) { + DRM_ERROR("Failed to allocate kernel doorbell bitmap\n"); return -ENOMEM; + } - pr_debug("Doorbell initialization:\n"); - pr_debug("doorbell base == 0x%08lX\n", - (uintptr_t)kfd->doorbell_base); - - pr_debug("doorbell_base_dw_offset == 0x%08lX\n", - kfd->doorbell_base_dw_offset); - - pr_debug("doorbell_process_limit == 0x%08lX\n", - doorbell_process_limit); - - pr_debug("doorbell_kernel_offset == 0x%08lX\n", - (uintptr_t)kfd->doorbell_base); - - pr_debug("doorbell aperture size == 0x%08lX\n", - kfd->shared_resources.doorbell_aperture_size); - - pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); + /* Alloc a doorbell page for KFD kernel usages */ + r = amdgpu_bo_create_kernel(kfd->adev, + size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_DOORBELL, + &kfd->doorbells, + NULL, + (void **)&kfd->doorbell_kernel_ptr); + if (r) { + pr_err("failed to allocate kernel doorbells\n"); + bitmap_free(kfd->doorbell_bitmap); + return r; + } + pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); return 0; } void kfd_doorbell_fini(struct kfd_dev *kfd) { - if (kfd->doorbell_kernel_ptr) - iounmap(kfd->doorbell_kernel_ptr); + bitmap_free(kfd->doorbell_bitmap); + amdgpu_bo_free_kernel(&kfd->doorbells, NULL, + (void **)&kfd->doorbell_kernel_ptr); } int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, @@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, u32 inx; mutex_lock(&kfd->doorbell_mutex); - inx = find_first_zero_bit(kfd->doorbell_available_index, - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32)); - __set_bit(inx, kfd->doorbell_available_index); + __set_bit(inx, kfd->doorbell_bitmap); mutex_unlock(&kfd->doorbell_mutex); if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; - inx *= kfd->device_info.doorbell_size / sizeof(u32); - - /* - * Calculating the kernel doorbell offset using the first - * doorbell page. - */ - *doorbell_off = kfd->doorbell_base_dw_offset + inx; + *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx); pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" @@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) { unsigned int inx; - inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) - * sizeof(u32) / kfd->device_info.doorbell_size; + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); mutex_lock(&kfd->doorbell_mutex); - __clear_bit(inx, kfd->doorbell_available_index); + __clear_bit(inx, kfd->doorbell_bitmap); mutex_unlock(&kfd->doorbell_mutex); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 94a438956868..42c215782d72 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -349,6 +349,12 @@ struct kfd_dev { /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; + + /* Kernel doorbells for KFD device */ + struct amdgpu_bo *doorbells; + + /* bitmap for dynamic doorbell allocation from this object */ + unsigned long *doorbell_bitmap; }; enum kfd_mempool { -- 2.40.1