Re: [PATCH] drm/amd/amdkfd: add/remove kfd queues through on stop/start KFD scheduling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2024-10-16 21:44, shaoyunl wrote:
Add back kfd queues in start scheduling that originally been
removed on stop scheduling.

Signed-off-by: shaoyunl <shaoyun.liu@xxxxxxx>

---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 40 +++++++++++++++++--
  1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b2b16a812e73..437ed0ae6e76 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -204,6 +204,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
if (!down_read_trylock(&adev->reset_domain->sem))
  		return -EIO;
+	if (!dqm->sched_running || dqm->sched_halt)
+		return 0;
memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
  	queue_input.process_id = qpd->pqm->process->pasid;
@@ -272,6 +274,8 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
if (!down_read_trylock(&adev->reset_domain->sem))
  		return -EIO;
+	if (!dqm->sched_running || dqm->sched_halt)
+		return 0;
memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
  	queue_input.doorbell_offset = q->properties.doorbell_off;
@@ -292,7 +296,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
  	return r;
  }
-static int remove_all_queues_mes(struct device_queue_manager *dqm)
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
  {
  	struct device_process_node *cur;
  	struct device *dev = dqm->dev->adev->dev;
@@ -319,6 +323,33 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
  	return retval;
  }
+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+	struct device_process_node *cur;
+	struct device *dev = dqm->dev->adev->dev;
+	struct qcm_process_device *qpd;
+	struct queue *q;
+	int retval = 0;
+
+	list_for_each_entry(cur, &dqm->queues, list) {
+		qpd = cur->qpd;
+		list_for_each_entry(q, &qpd->queues_list, list) {
+			if (q->properties.is_active) {

You could reduce the level of indentation below by changing this condition to

	if (!q->properties.is_active)
		continue;

Other than that, the patch looks reasonable to me. I assume there is no other way to tell the MES to stop scheduling user mode queues.

Regards,
  Felix


+				retval = add_queue_mes(dqm, q, qpd);
+				if (retval) {
+					dev_err(dev, "%s: Failed to add queue %d for dev %d",
+						__func__,
+						q->properties.queue_id,
+						dqm->dev->id);
+					return retval;
+				}
+			}
+		}
+	}
+
+	return retval;
+}
+
  static int suspend_all_queues_mes(struct device_queue_manager *dqm)
  {
  	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
@@ -1742,7 +1773,7 @@ static int halt_cpsch(struct device_queue_manager *dqm)
  						 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
  				USE_DEFAULT_GRACE_PERIOD, false);
  		else
-			ret = remove_all_queues_mes(dqm);
+			ret = remove_all_kfd_queues_mes(dqm);
  	}
  	dqm->sched_halt = true;
  	dqm_unlock(dqm);
@@ -1768,6 +1799,9 @@ static int unhalt_cpsch(struct device_queue_manager *dqm)
  		ret = execute_queues_cpsch(dqm,
  					   KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
  			0, USE_DEFAULT_GRACE_PERIOD);
+	else
+		ret = add_all_kfd_queues_mes(dqm);
+
  	dqm_unlock(dqm);
return ret;
@@ -1867,7 +1901,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
  	if (!dqm->dev->kfd->shared_resources.enable_mes)
  		unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
  	else
-		remove_all_queues_mes(dqm);
+		remove_all_kfd_queues_mes(dqm);
dqm->sched_running = false;



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux