On Wed, Mar 29, 2023 at 11:48 AM Shashank Sharma <shashank.sharma@xxxxxxx> wrote: > > This patch: > - adds a doorbell manager structure in kfd device structure. > - plugs-in doorbell manager APIs for KFD kernel doorbell allocations > an free functions. > - removes the doorbell bitmap, uses the one into the doorbell manager > structure for all the allocations. > - updates the get_kernel_doorbell and free_kernel_doorbell functions > accordingly > > Cc: Alex Deucher <alexander.deucher@xxxxxxx> > Cc: Christian Koenig <christian.koenig@xxxxxxx> > Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> > --- > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +- > drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 109 ++++++---------------- > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + > 3 files changed, 35 insertions(+), 81 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > index b8936340742b..a2e4cbddba26 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > @@ -435,8 +435,8 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) > atomic_set(&kfd->compute_profile, 0); > > mutex_init(&kfd->doorbell_mutex); > - memset(&kfd->doorbell_available_index, 0, > - sizeof(kfd->doorbell_available_index)); > + memset(kfd->kernel_doorbells.doorbell_bitmap, 0, > + kfd->kernel_doorbells.size / BITS_PER_LONG); > > atomic_set(&kfd->sram_ecc_flag, 0); > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > index cd4e61bf0493..df259f2cc58a 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c > @@ -61,81 +61,37 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) > /* Doorbell calculations for device init. */ > int kfd_doorbell_init(struct kfd_dev *kfd) > { > - size_t doorbell_start_offset; > - size_t doorbell_aperture_size; > - size_t doorbell_process_limit; > + int r; > + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells; > > - /* > - * With MES enabled, just set the doorbell base as it is needed > - * to calculate doorbell physical address. > - */ > - if (kfd->shared_resources.enable_mes) { > - kfd->doorbell_base = > - kfd->shared_resources.doorbell_physical_address; > - return 0; > - } > - > - /* > - * We start with calculations in bytes because the input data might > - * only be byte-aligned. > - * Only after we have done the rounding can we assume any alignment. > - */ > - > - doorbell_start_offset = > - roundup(kfd->shared_resources.doorbell_start_offset, > - kfd_doorbell_process_slice(kfd)); > - > - doorbell_aperture_size = > - rounddown(kfd->shared_resources.doorbell_aperture_size, > - kfd_doorbell_process_slice(kfd)); > - > - if (doorbell_aperture_size > doorbell_start_offset) > - doorbell_process_limit = > - (doorbell_aperture_size - doorbell_start_offset) / > - kfd_doorbell_process_slice(kfd); > - else > - return -ENOSPC; > - > - if (!kfd->max_doorbell_slices || > - doorbell_process_limit < kfd->max_doorbell_slices) > - kfd->max_doorbell_slices = doorbell_process_limit; > - > - kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + > - doorbell_start_offset; > - > - kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32); > - > - kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, > - kfd_doorbell_process_slice(kfd)); > - > - if (!kfd->doorbell_kernel_ptr) > + /* Bitmap to dynamically allocate doorbells from kernel page */ > + kernel_doorbells->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE, GFP_KERNEL); > + if (!kernel_doorbells->doorbell_bitmap) { > + DRM_ERROR("Failed to allocate kernel doorbell bitmap\n"); > return -ENOMEM; > + } > > - pr_debug("Doorbell initialization:\n"); > - pr_debug("doorbell base == 0x%08lX\n", > - (uintptr_t)kfd->doorbell_base); > - > - pr_debug("doorbell_base_dw_offset == 0x%08lX\n", > - kfd->doorbell_base_dw_offset); > - > - pr_debug("doorbell_process_limit == 0x%08lX\n", > - doorbell_process_limit); > - > - pr_debug("doorbell_kernel_offset == 0x%08lX\n", > - (uintptr_t)kfd->doorbell_base); > - > - pr_debug("doorbell aperture size == 0x%08lX\n", > - kfd->shared_resources.doorbell_aperture_size); > + /* Alloc and reserve doorbells for KFD kernel usages */ > + kernel_doorbells->size = PAGE_SIZE; > + r = amdgpu_doorbell_alloc_page(kfd->adev, kernel_doorbells); Just do something like: r = amdgpu_bo_create_kernel(kfd->adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_DOORBELL, &kfd->doorbell_kernel_bo, NULL, &kfd->doorbell_kernel_ptr); Then you have your KFD pointer to its doorbell memory and no need to track the ranges. Alex > + if (r) { > + pr_err("failed to allocate kernel doorbells\n"); > + bitmap_free(kernel_doorbells->doorbell_bitmap); > + return r; > + } > > - pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); > + kfd->doorbell_kernel_ptr = kernel_doorbells->cpu_addr; > + pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); > > return 0; > } > > void kfd_doorbell_fini(struct kfd_dev *kfd) > { > - if (kfd->doorbell_kernel_ptr) > - iounmap(kfd->doorbell_kernel_ptr); > + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells; > + > + bitmap_free(kernel_doorbells->doorbell_bitmap); > + amdgpu_doorbell_free_page(kfd->adev, kernel_doorbells); > } > > int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, > @@ -186,24 +142,19 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, > unsigned int *doorbell_off) > { > u32 inx; > + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells; > > mutex_lock(&kfd->doorbell_mutex); > - inx = find_first_zero_bit(kfd->doorbell_available_index, > - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); > + inx = find_first_zero_bit(kernel_doorbells->doorbell_bitmap, > + kernel_doorbells->size); > > - __set_bit(inx, kfd->doorbell_available_index); > + __set_bit(inx, kernel_doorbells->doorbell_bitmap); > mutex_unlock(&kfd->doorbell_mutex); > > if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) > return NULL; > > - inx *= kfd->device_info.doorbell_size / sizeof(u32); > - > - /* > - * Calculating the kernel doorbell offset using the first > - * doorbell page. > - */ > - *doorbell_off = kfd->doorbell_base_dw_offset + inx; > + *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kernel_doorbells->bo, inx); > > pr_debug("Get kernel queue doorbell\n" > " doorbell offset == 0x%08X\n" > @@ -216,12 +167,12 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, > void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) > { > unsigned int inx; > + struct amdgpu_doorbell_obj *kernel_doorbells = &kfd->kernel_doorbells; > > - inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) > - * sizeof(u32) / kfd->device_info.doorbell_size; > + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); > > mutex_lock(&kfd->doorbell_mutex); > - __clear_bit(inx, kfd->doorbell_available_index); > + __clear_bit(inx, kernel_doorbells->doorbell_bitmap); > mutex_unlock(&kfd->doorbell_mutex); > } > > @@ -280,7 +231,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) > if (!pdd->doorbell_index) { > int r = kfd_alloc_process_doorbells(pdd->dev, > &pdd->doorbell_index); > - if (r) > + if (r < 0) > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 552c3ac85a13..0ed33416c35f 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -346,6 +346,9 @@ struct kfd_dev { > > /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ > struct dev_pagemap pgmap; > + > + /* Kernel doorbells for KFD device */ > + struct amdgpu_doorbell_obj kernel_doorbells; > }; > > enum kfd_mempool { > -- > 2.40.0 >