/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
* on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
*/
@@ -410,7 +404,6 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
if (wptr_bo)
amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
err_wptr_map_gart:
-err_alloc_doorbells:
err_bind_process:
err_pdd:
mutex_unlock(&p->mutex);
@@ -2163,12 +2156,6 @@ static int criu_restore_devices(struct kfd_process *p,
ret = PTR_ERR(pdd);
goto exit;
}
-
- if (!pdd->doorbell_index &&
- kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
- ret = -ENOMEM;
- goto exit;
- }
}
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ecb4c3abc629..855bf8ce3f16 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -371,7 +371,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
unsigned int found;
found = find_first_zero_bit(qpd->doorbell_bitmap,
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
pr_debug("No doorbells available");
return -EBUSY;
@@ -381,9 +381,9 @@ static int allocate_doorbell(struct qcm_process_device *qpd,
}
}
- q->properties.doorbell_off =
- kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
- q->doorbell_id);
+ q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev,
+ qpd->proc_doorbells,
+ q->doorbell_id);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 82b4a56b0afc..718cfe9cb4f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -227,46 +227,47 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
{
- if (!pdd->doorbell_index) {
- int r = kfd_alloc_process_doorbells(pdd->dev,
- &pdd->doorbell_index);
- if (r < 0)
- return 0;
- }
+ struct amdgpu_device *adev = pdd->dev->adev;
+ uint32_t first_db_index;
- return pdd->dev->doorbell_base +
- pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
+ first_db_index = amdgpu_doorbell_index_on_bar(adev, pdd->qpd.proc_doorbells, 0);
+ return adev->doorbell.base + first_db_index * sizeof(uint32_t);
}
-int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
{
- int r = 0;
-
- if (!kfd->shared_resources.enable_mes)
- r = ida_simple_get(&kfd->doorbell_ida, 1,
- kfd->max_doorbell_slices, GFP_KERNEL);
- else
- r = amdgpu_mes_alloc_process_doorbells(
- (struct amdgpu_device *)kfd->adev,
- doorbell_index);
+ int r;
+ struct qcm_process_device *qpd = &pdd->qpd;
- if (r > 0)
- *doorbell_index = r;
+ /* Allocate bitmap for dynamic doorbell allocation */
+ qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ GFP_KERNEL);
+ if (!qpd->doorbell_bitmap) {
+ DRM_ERROR("Failed to allocate process doorbell bitmap\n");
+ return -ENOMEM;
+ }
- if (r < 0)
- pr_err("Failed to allocate process doorbells\n");
+ /* Allocate doorbells for this process */
+ r = amdgpu_bo_create_kernel(kfd->adev,
+ kfd_doorbell_process_slice(kfd),
+ PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_DOORBELL,
+ &qpd->proc_doorbells,
+ NULL,
+ NULL);
+ if (r) {
+ DRM_ERROR("Failed to allocate process doorbells\n");
+ bitmap_free(qpd->doorbell_bitmap);
+ return r;
+ }
- return r;
+ return 0;
}
-void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
+void kfd_free_process_doorbells(struct kfd_dev *kfd, struct kfd_process_device *pdd)
{
- if (doorbell_index) {
- if (!kfd->shared_resources.enable_mes)
- ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
- else
- amdgpu_mes_free_process_doorbells(
- (struct amdgpu_device *)kfd->adev,
- doorbell_index);
- }
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ bitmap_free(qpd->doorbell_bitmap);
+ amdgpu_bo_free_kernel(&qpd->proc_doorbells, NULL, NULL);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0b199eb6dc88..dfff77379acb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -661,7 +661,10 @@ struct qcm_process_device {
uint64_t ib_base;
void *ib_kaddr;
- /* doorbell resources per process per device */
+ /* doorbells for kfd process */
+ struct amdgpu_bo *proc_doorbells;
+
+ /* bitmap for dynamic doorbell allocation from the bo */
unsigned long *doorbell_bitmap;
};
@@ -1009,9 +1012,9 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
- unsigned int *doorbell_index);
+ struct kfd_process_device *pdd);
void kfd_free_process_doorbells(struct kfd_dev *kfd,
- unsigned int doorbell_index);
+ struct kfd_process_device *pdd);
/* GTT Sub-Allocator */
int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 51b1683ac5c1..217ff72a97b0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1037,10 +1037,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
- bitmap_free(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
- kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
+ kfd_free_process_doorbells(pdd->dev, pdd);
if (pdd->dev->shared_resources.enable_mes)
amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
@@ -1453,11 +1452,6 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
if (!KFD_IS_SOC15(dev))
return 0;
- qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
- GFP_KERNEL);
- if (!qpd->doorbell_bitmap)
- return -ENOMEM;
-
/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
pr_debug("reserved doorbell 0x%03x - 0x%03x\n", range_start, range_end);
pr_debug("reserved doorbell 0x%03x - 0x%03x\n",
@@ -1499,9 +1493,15 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
+ retval = kfd_alloc_process_doorbells(dev, pdd);
+ if (retval) {
+ pr_err("failed to allocate process doorbells\n");
+ goto err_free_pdd;
+ }
+
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
pr_err("Failed to init doorbell for process\n");
- goto err_free_pdd;
+ goto err_free_db;
}
pdd->dev = dev;
@@ -1529,7 +1529,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
false);
if (retval) {
pr_err("failed to allocate process context bo\n");
- goto err_free_pdd;
+ goto err_free_db;
}
memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
}
@@ -1541,6 +1541,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
return pdd;
+err_free_db:
+ kfd_free_process_doorbells(pdd->dev, pdd);
+
err_free_pdd:
kfree(pdd);
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5137476ec18e..6c95586f08a6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -344,17 +344,19 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
- if (q && p_doorbell_offset_in_process)
+ if (q && p_doorbell_offset_in_process) {
/* Return the doorbell offset within the doorbell page
* to the caller so it can be passed up to user mode
* (in bytes).
- * There are always 1024 doorbells per process, so in case
- * of 8-byte doorbells, there are two doorbell pages per
- * process.
+ * relative doorbell index = Absolute doorbell index -
+ * absolute index of first doorbell in the page.
*/
- *p_doorbell_offset_in_process =
- (q->properties.doorbell_off * sizeof(uint32_t)) &
- (kfd_doorbell_process_slice(dev) - 1);
+ uint32_t first_db_index = amdgpu_doorbell_index_on_bar(pdd->dev->adev,
+ pdd->qpd.proc_doorbells, 0);
+
+ *p_doorbell_offset_in_process = (q->properties.doorbell_off
+ - first_db_index) * sizeof(uint32_t);
+ }
pr_debug("PQM After DQM create queue\n");
@@ -858,12 +860,6 @@ int kfd_criu_restore_queue(struct kfd_process *p,
goto exit;
}
- if (!pdd->doorbell_index &&
- kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
- ret = -ENOMEM;
- goto exit;
- }
-
/* data stored in this order: mqd, ctl_stack */
mqd = q_extra_data;
ctl_stack = mqd + q_data->mqd_size;