RE: [PATCH] drm/amd/amdkfd: add/remove kfd queues on start/stop KFD scheduling

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - AMD Internal Distribution Only]


Good catch .   Thanks . I will sent out another review for that .

 

Regards

Shaoyun.liu

 

From: Yang, Philip <Philip.Yang@xxxxxxx>
Sent: Thursday, October 17, 2024 3:47 PM
To: Liu, Shaoyun <Shaoyun.Liu@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Subject: Re: [PATCH] drm/amd/amdkfd: add/remove kfd queues on start/stop KFD scheduling

 

 

On 2024-10-17 12:12, Shaoyun Liu wrote:

From: shaoyunl <shaoyun.liu@xxxxxxx>
 
Add back kfd queues in start scheduling that originally been
removed on stop scheduling.
 
Signed-off-by: Shaoyun Liu <shaoyun.liu@xxxxxxx>
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 40 +++++++++++++++++--
 1 file changed, 37 insertions(+), 3 deletions(-)
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b2b16a812e73..542363b4712e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -204,6 +204,8 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
 
  if (!down_read_trylock(&adev->reset_domain->sem))
         return -EIO;
+ if (!dqm->sched_running || dqm->sched_halt) {

                    up_read(&adev->reset_domain->sem);

 
+        return 0;

            }

 
 
  memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
  queue_input.process_id = qpd->pqm->process->pasid;
@@ -272,6 +274,8 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
 
  if (!down_read_trylock(&adev->reset_domain->sem))
         return -EIO;
+ if (!dqm->sched_running || dqm->sched_halt) {

                     up_read(&adev->reset_domain->sem);

 
+        return 0;

              }

It is simpler to move sched_halt/running check outside reset sem lock, but not sure if it is safe.

Regards,

Philip 

 
 
  memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
  queue_input.doorbell_offset = q->properties.doorbell_off;
@@ -292,7 +296,7 @@ static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
  return r;
 }
 
-static int remove_all_queues_mes(struct device_queue_manager *dqm)
+static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm)
 {
  struct device_process_node *cur;
  struct device *dev = dqm->dev->adev->dev;
@@ -319,6 +323,33 @@ static int remove_all_queues_mes(struct device_queue_manager *dqm)
  return retval;
 }
 
+static int add_all_kfd_queues_mes(struct device_queue_manager *dqm)
+{
+ struct device_process_node *cur;
+ struct device *dev = dqm->dev->adev->dev;
+ struct qcm_process_device *qpd;
+ struct queue *q;
+ int retval = 0;
+
+ list_for_each_entry(cur, &dqm->queues, list) {
+        qpd = cur->qpd;
+        list_for_each_entry(q, &qpd->queues_list, list) {
+                if (!q->properties.is_active)
+                        continue;
+                retval = add_queue_mes(dqm, q, qpd);
+                if (retval) {
+                        dev_err(dev, "%s: Failed to add queue %d for dev %d",
+                               __func__,
+                          q->properties.queue_id,
+                          dqm->dev->id);
+                        return retval;
+                }
+        }
+ }
+
+ return retval;
+}
+
 static int suspend_all_queues_mes(struct device_queue_manager *dqm)
 {
  struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
@@ -1742,7 +1773,7 @@ static int halt_cpsch(struct device_queue_manager *dqm)
                                         KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
                   USE_DEFAULT_GRACE_PERIOD, false);
         else
-                ret = remove_all_queues_mes(dqm);
+                ret = remove_all_kfd_queues_mes(dqm);
  }
  dqm->sched_halt = true;
  dqm_unlock(dqm);
@@ -1768,6 +1799,9 @@ static int unhalt_cpsch(struct device_queue_manager *dqm)
         ret = execute_queues_cpsch(dqm,
                                   KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
                 0, USE_DEFAULT_GRACE_PERIOD);
+ else
+        ret = add_all_kfd_queues_mes(dqm);
+
  dqm_unlock(dqm);
 
  return ret;
@@ -1867,7 +1901,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
  if (!dqm->dev->kfd->shared_resources.enable_mes)
         unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false);
  else
-        remove_all_queues_mes(dqm);
+        remove_all_kfd_queues_mes(dqm);
 
  dqm->sched_running = false;
 

[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux