+ /* Do not reuse SDMA0 for any subsequent SDMA queue
+ * requests on Aldebaran. If SDMA0's queues are all
+ * full, then this process should never use SDMA0
+ * for any further requests
+ */
+ if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN &&
+ engine == 0)
+ qpd->sdma_engine_bitmap &= ~(1ULL << engine);
+
+ available_queue_bitmap = sdma_engine_mask(engine,
num_engines);
+ available_queue_bitmap &= dqm->sdma_bitmap;
+
+ if (!available_queue_bitmap)
+ continue;
+ /* Take the selected engine off the list so we will not
+ * allocate two queues onto the same engine
+ */
+ qpd->sdma_engine_bitmap &= ~(1ULL << engine);
+ found_sdma = true;
+
+ bit = __ffs64(available_queue_bitmap);
+ dqm->sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ q->properties.sdma_engine_id = q->sdma_id % num_engines;
+ q->properties.sdma_queue_id = q->sdma_id / num_engines;
+ break;
+ }
+ if (!found_sdma) {
pr_err("No more SDMA queue to allocate\n");
return -ENOMEM;
}
-
- bit = __ffs64(dqm->sdma_bitmap);
- dqm->sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
- q->properties.sdma_engine_id = q->sdma_id %
- get_num_sdma_engines(dqm);
- q->properties.sdma_queue_id = q->sdma_id /
- get_num_sdma_engines(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
- if (dqm->xgmi_sdma_bitmap == 0) {
+ num_engines = get_num_xgmi_sdma_engines(dqm);
+ for_each_set_bit(engine, &(qpd->xgmi_sdma_engine_bitmap),
num_engines) {
+ available_queue_bitmap = sdma_engine_mask(engine,
num_engines);
+ available_queue_bitmap &= dqm->xgmi_sdma_bitmap;
+
+ if (!available_queue_bitmap)
+ continue;
+ /* Take the selected engine off the list so we will not
+ * allocate two queues onto the same engine
+ */
+ qpd->xgmi_sdma_engine_bitmap &= ~(1ULL << engine);
+ found_sdma = true;
+
+ bit = __ffs64(available_queue_bitmap);
+ dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
+ q->sdma_id = bit;
+ /* sdma_engine_id is sdma id including
+ * both PCIe-optimized SDMAs and XGMI-
+ * optimized SDMAs. The calculation below
+ * assumes the first N engines are always
+ * PCIe-optimized ones
+ */
+ q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
+ q->sdma_id % get_num_xgmi_sdma_engines(dqm);
+ q->properties.sdma_queue_id = q->sdma_id /
+ get_num_xgmi_sdma_engines(dqm);
+ break;
+ }
+ if (!found_sdma) {
pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
}
- bit = __ffs64(dqm->xgmi_sdma_bitmap);
- dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
- q->sdma_id = bit;
- /* sdma_engine_id is sdma id including
- * both PCIe-optimized SDMAs and XGMI-
- * optimized SDMAs. The calculation below
- * assumes the first N engines are always
- * PCIe-optimized ones
- */
- q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
- q->sdma_id % get_num_xgmi_sdma_engines(dqm);
- q->properties.sdma_queue_id = q->sdma_id /
- get_num_xgmi_sdma_engines(dqm);
}
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -1084,16 +1137,32 @@ static int allocate_sdma_queue(struct
device_queue_manager *dqm,
}
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd,
struct queue *q)
{
+ uint32_t engine = q->properties.sdma_engine_id;
+
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
if (q->sdma_id >= get_num_sdma_queues(dqm))
return;
dqm->sdma_bitmap |= (1ULL << q->sdma_id);
+ /* Don't give SDMA0 back to be reallocated on Aldebaran.
+ * It is only OK to use this engine from the first allocation
+ * within a process.
+ */
+ if (!(dqm->dev->device_info->asic_family == CHIP_ALDEBARAN &&
+ engine == 0))
+ qpd->sdma_engine_bitmap |= (1ULL << engine);
+
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
return;
dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
+ /* engine ID in the queue properties is the global engine ID.
+ * The XGMI engine bitmap ignores the PCIe-optimized engines.
+ */
+ engine -= get_num_sdma_engines(dqm);
+ qpd->xgmi_sdma_engine_bitmap |= (1ULL << engine);
}
}
@@ -1303,7 +1372,7 @@ static int create_queue_cpsch(struct
device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm_lock(dqm);
- retval = allocate_sdma_queue(dqm, q);
+ retval = allocate_sdma_queue(dqm, qpd, q);
dqm_unlock(dqm);
if (retval)
goto out;
@@ -1365,7 +1434,7 @@ static int create_queue_cpsch(struct
device_queue_manager *dqm, struct queue *q,
if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
dqm_lock(dqm);
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue(dqm, qpd, q);
dqm_unlock(dqm);
}
out:
@@ -1536,7 +1605,7 @@ static int destroy_queue_cpsch(struct
device_queue_manager *dqm,
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue(dqm, qpd, q);
pdd->sdma_past_activity_counter += sdma_val;
}
@@ -1751,9 +1820,9 @@ static int process_termination_cpsch(struct
device_queue_manager *dqm,
/* Clear all user mode queues */
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue(dqm, qpd, q);
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
- deallocate_sdma_queue(dqm, q);
+ deallocate_sdma_queue(dqm, qpd, q);
if (q->properties.is_active) {
decrement_queue_count(dqm, q->properties.type);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ab83b0de6b22..c38eebc9db4d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -576,6 +576,8 @@ struct qcm_process_device {
struct list_head priv_queue_list;
unsigned int queue_count;
+ unsigned long sdma_engine_bitmap;
+ unsigned long xgmi_sdma_engine_bitmap;
unsigned int vmid;
bool is_debug;
unsigned int evicted; /* eviction counter, 0=active */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 21ec8a18cad2..13c85624bf7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1422,6 +1422,7 @@ struct kfd_process_device
*kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p)
{
struct kfd_process_device *pdd = NULL;
+ const struct kfd_device_info *dev_info =
dev->dqm->dev->device_info;
if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
return NULL;
@@ -1446,6 +1447,8 @@ struct kfd_process_device
*kfd_create_process_device_data(struct kfd_dev *dev,
pdd->qpd.pqm = &p->pqm;
pdd->qpd.evicted = 0;
pdd->qpd.mapped_gws_queue = false;
+ pdd->qpd.sdma_engine_bitmap =
BIT_ULL(dev_info->num_sdma_engines) - 1;
+ pdd->qpd.xgmi_sdma_engine_bitmap =
BIT_ULL(dev_info->num_xgmi_sdma_engines) - 1;
pdd->process = p;
pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false;