On Wed, Mar 29, 2023 at 11:48 AM Shashank Sharma <shashank.sharma@xxxxxxx> wrote: > > This patch: > - adds a doorbell object in MES structure, to manage the MES > doorbell requirements in kernel. > - Removes the doorbell management code, and its variables from > the doorbell_init function, it will be done in doorbell manager > now. > - creates doorbell pages for MES kernel level needs (doorbells > for MES self tests) > - current MES code was allocating MES doorbells in MES process context, > but those were rung using kernel doorbell calls. This patch allocates > MES kernel doorbells instead for this in add_hw_queue. > > Cc: Alex Deucher <alexander.deucher@xxxxxxx> > Cc: Christian Koenig <christian.koenig@xxxxxxx> > Signed-off-by: Shashank Sharma <shashank.sharma@xxxxxxx> > Signed-off-by: Arvind Yadav <arvind.yadav@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 105 ++++++++++++------------ > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 5 +- > 2 files changed, 56 insertions(+), 54 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > index 0c546245793b..423cd642647c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > @@ -65,91 +65,89 @@ unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( > doorbell_id * 2); > } > > -static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev, > +static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, > struct amdgpu_mes_process *process, > int ip_type, uint64_t *doorbell_index) > { > unsigned int offset, found; > + struct amdgpu_doorbell_obj *doorbells = &adev->mes.kernel_doorbells; > > - if (ip_type == AMDGPU_RING_TYPE_SDMA) { > + if (ip_type == AMDGPU_RING_TYPE_SDMA) > offset = adev->doorbell_index.sdma_engine[0]; > - found = find_next_zero_bit(process->doorbell_bitmap, > - AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, > - offset); > - } else { > - found = find_first_zero_bit(process->doorbell_bitmap, > - AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS); > - } > + else > + offset = 0; > > + found = find_next_zero_bit(doorbells->doorbell_bitmap, > + AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, > + offset); > if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) { > DRM_WARN("No doorbell available\n"); > return -ENOSPC; > } > > - set_bit(found, process->doorbell_bitmap); > + set_bit(found, doorbells->doorbell_bitmap); > > - *doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev, > - process->doorbell_index, found); > + *doorbell_index = amdgpu_doorbell_index_on_bar(adev, doorbells->bo, found); > > return 0; > } > > -static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev, > +static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, > struct amdgpu_mes_process *process, > uint32_t doorbell_index) > { > unsigned int old, doorbell_id; > + struct amdgpu_doorbell_obj *doorbells = &adev->mes.kernel_doorbells; > > - doorbell_id = doorbell_index - > - (process->doorbell_index * > - amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32); > + /* Find the relative index of the doorbell in this object */ > + doorbell_id = doorbell_index - doorbells->start; > doorbell_id /= 2; > > - old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap); > + old = test_and_clear_bit(doorbell_id, doorbells->doorbell_bitmap); > WARN_ON(!old); > } > > static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) > { > - size_t doorbell_start_offset; > - size_t doorbell_aperture_size; > - size_t doorbell_process_limit; > - size_t aggregated_doorbell_start; > - int i; > - > - aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32); > - aggregated_doorbell_start = > - roundup(aggregated_doorbell_start, PAGE_SIZE); > - > - doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE; > - doorbell_start_offset = > - roundup(doorbell_start_offset, > - amdgpu_mes_doorbell_process_slice(adev)); > - > - doorbell_aperture_size = adev->doorbell.size; > - doorbell_aperture_size = > - rounddown(doorbell_aperture_size, > - amdgpu_mes_doorbell_process_slice(adev)); > - > - if (doorbell_aperture_size > doorbell_start_offset) > - doorbell_process_limit = > - (doorbell_aperture_size - doorbell_start_offset) / > - amdgpu_mes_doorbell_process_slice(adev); > - else > - return -ENOSPC; > + int i, r; > + u32 agg_db_start_index, nbits; > + struct amdgpu_doorbell_obj *mes_doorbells = &adev->mes.kernel_doorbells; > > - adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32); > - adev->mes.max_doorbell_slices = doorbell_process_limit; > + /* Allocated one page doorbells for MES kernel usages */ > + mes_doorbells->size = PAGE_SIZE; > > - /* allocate Qword range for aggregated doorbell */ > - for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) > - adev->mes.aggregated_doorbells[i] = > - aggregated_doorbell_start / sizeof(u32) + i * 2; > + nbits = DIV_ROUND_UP(mes_doorbells->size, sizeof(u32)); > + mes_doorbells->doorbell_bitmap = bitmap_zalloc(nbits, GFP_KERNEL); > + if (!mes_doorbells->doorbell_bitmap) { > + DRM_ERROR("Failed to allocate MES doorbell bitmap\n"); > + return -ENOMEM; > + } > + > + r = amdgpu_doorbell_alloc_page(adev, mes_doorbells); Rather than allocating a separate page here, just allocate two pages in the earlier patch where you allocate the KGD doorbell and then just use the second page here. Alex > + if (r) { > + DRM_ERROR("Failed to create MES doorbell object\n, err=%d", r); > + bitmap_free(mes_doorbells->doorbell_bitmap); > + return r; > + } > + > + /* Get the absolute doorbell index for aggregated doobells */ > + agg_db_start_index = mes_doorbells->start; > + for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { > + adev->mes.aggregated_doorbells[i] = agg_db_start_index + i; > + set_bit(agg_db_start_index + i, mes_doorbells->doorbell_bitmap); > + } > > - DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit); > return 0; > } > > +static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) > +{ > + struct amdgpu_doorbell_obj *mes_doorbells = &adev->mes.kernel_doorbells; > + > + bitmap_free(mes_doorbells->doorbell_bitmap); > + amdgpu_doorbell_free_page(adev, mes_doorbells); > +} > + > int amdgpu_mes_init(struct amdgpu_device *adev) > { > int i, r; > @@ -248,6 +246,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) > amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); > amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); > amdgpu_device_wb_free(adev, adev->mes.read_val_offs); > + amdgpu_mes_doorbell_free(adev); > > idr_destroy(&adev->mes.pasid_idr); > idr_destroy(&adev->mes.gang_id_idr); > @@ -677,7 +676,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, > *queue_id = queue->queue_id = r; > > /* allocate a doorbell index for the queue */ > - r = amdgpu_mes_queue_doorbell_get(adev, gang->process, > + r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, > qprops->queue_type, > &qprops->doorbell_off); > if (r) > @@ -735,7 +734,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, > return 0; > > clean_up_doorbell: > - amdgpu_mes_queue_doorbell_free(adev, gang->process, > + amdgpu_mes_kernel_doorbell_free(adev, gang->process, > qprops->doorbell_off); > clean_up_queue_id: > spin_lock_irqsave(&adev->mes.queue_id_lock, flags); > @@ -790,7 +789,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) > queue_id); > > list_del(&queue->list); > - amdgpu_mes_queue_doorbell_free(adev, gang->process, > + amdgpu_mes_kernel_doorbell_free(adev, gang->process, > queue->doorbell_off); > amdgpu_mes_unlock(&adev->mes); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > index 97c05d08a551..e7e9dfe44c99 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > @@ -27,6 +27,7 @@ > #include "amdgpu_irq.h" > #include "kgd_kfd_interface.h" > #include "amdgpu_gfx.h" > +#include "amdgpu_doorbell.h" > #include <linux/sched/mm.h> > > #define AMDGPU_MES_MAX_COMPUTE_PIPES 8 > @@ -76,7 +77,6 @@ struct amdgpu_mes { > uint32_t kiq_version; > > uint32_t total_max_queue; > - uint32_t doorbell_id_offset; > uint32_t max_doorbell_slices; > > uint64_t default_process_quantum; > @@ -128,6 +128,9 @@ struct amdgpu_mes { > int (*kiq_hw_init)(struct amdgpu_device *adev); > int (*kiq_hw_fini)(struct amdgpu_device *adev); > > + /* MES Kernel doorbells */ > + struct amdgpu_doorbell_obj kernel_doorbells; > + > /* ip specific functions */ > const struct amdgpu_mes_funcs *funcs; > }; > -- > 2.40.0 >