Re: [PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I'm not sure about this one. Looks like the interface is getting needlessly more complicated. Now the caller has to keep track of the runlist IB address and size just to pass those to another function. I could understand this if there was a use case that needs to separate the allocation of the runlist and sending it to the HW. But I don't see that.

Some background for why I think the interface is the way it is: The runlist IB is continuously executed by the HWS firmware. If the runlist is oversubscribed, the HWS firmware will loop through it. So the IB must remain allocated until pm_send_unmap_queue is called. Currently pm_send_runlist creates the runlist IB and sends it to the HWS. You're separating that into creation and sending. Do you see a case where you need to send the same runlist multiple times? Or do something else between creating the runlist and sending it to the HWS?

pm_release_ib releases the runlist IB, assuming that he HWS is no longer using it. Maybe this could be combined with pm_send_unmap_queue. I'm not 100% sure because there are some filter parameters that may leave some queues mapped. If the two can be combined, I'd suggest the following name and interface changes to reflect how I think this is being used today:

 * pm_send_runlist -> pm_create_and_send_runlist
 * pm_send_unmap_queue + pm_release_ib -> pm_preempt_and_free_runlist

I see you're doing a lot of cleanup and refactoring in this area of the code. Is there some bigger picture here, some idea of the end-state you're trying to get to? Knowing where you're going with this may make it easier to review the code.

Regards,
  Felix

On 2019-11-21 4:26 p.m., Yong Zhao wrote:
This is consistent with the calling sequence in unmap_queues_cpsch().

Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
Signed-off-by: Yong Zhao <Yong.Zhao@xxxxxxx>
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++++++++++++++--
  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +++++--------------
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  7 ++++++-
  3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 510f2d1bb8bb..fd7d90136b94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm)
  static int map_queues_cpsch(struct device_queue_manager *dqm)
  {
  	int retval;
+	uint64_t rl_ib_gpu_addr;
+	size_t rl_ib_size;
if (!dqm->sched_running)
  		return 0;
@@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)
  	if (dqm->active_runlist)
  		return 0;
- retval = pm_send_runlist(&dqm->packets, &dqm->queues);
+	retval = pm_create_runlist_ib(&dqm->packets, &dqm->queues,
+				&rl_ib_gpu_addr, &rl_ib_size);
+	if (retval)
+		goto fail_create_runlist_ib;
+
+	pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
+
+	retval = pm_send_runlist(&dqm->packets, &dqm->queues,
+			rl_ib_gpu_addr, rl_ib_size);
  	pr_debug("%s sent runlist\n", __func__);
  	if (retval) {
  		pr_err("failed to execute runlist\n");
-		return retval;
+		goto fail_create_runlist_ib;
  	}
  	dqm->active_runlist = true;
return retval;
+
+fail_create_runlist_ib:
+	pm_destroy_runlist_ib(&dqm->packets);
+	return retval;
  }
/* dqm->lock mutex has to be locked before calling this function */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 4a9433257428..6ec54e9f9392 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
  	return retval;
  }
-static int pm_create_runlist_ib(struct packet_manager *pm,
+int pm_create_runlist_ib(struct packet_manager *pm,
  				struct list_head *queues,
  				uint64_t *rl_gpu_addr,
  				size_t *rl_size_bytes)
@@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
  		/* build map process packet */
  		if (proccesses_mapped >= pm->dqm->processes_count) {
  			pr_debug("Not enough space left in runlist IB\n");
-			pm_destroy_runlist_ib(pm);
  			return -ENOMEM;
  		}
@@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
  	return retval;
  }
-int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
+			uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
  {
-	uint64_t rl_gpu_ib_addr;
  	uint32_t *rl_buffer;
-	size_t rl_ib_size, packet_size_dwords;
+	size_t packet_size_dwords;
  	int retval;
- retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
-					&rl_ib_size);
-	if (retval)
-		goto fail_create_runlist_ib;
-
-	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
-
  	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
  	mutex_lock(&pm->lock);
@@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
  	if (retval)
  		goto fail_acquire_packet_buffer;
- retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
+	retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
  					rl_ib_size / sizeof(uint32_t), false);
  	if (retval)
  		goto fail_create_runlist;
@@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
  	kq_rollback_packet(pm->priv_queue);
  fail_acquire_packet_buffer:
  	mutex_unlock(&pm->lock);
-fail_create_runlist_ib:
-	pm_destroy_runlist_ib(pm);
  	return retval;
  }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 389cda7c8f1a..6accb605b9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
  void pm_uninit(struct packet_manager *pm);
  int pm_send_set_resources(struct packet_manager *pm,
  				struct scheduling_resources *res);
-int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
+		uint64_t rl_ib_gpu_addr, size_t rl_ib_size);
  int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
  				uint32_t fence_value);
@@ -989,6 +990,10 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
  			uint32_t filter_param, bool reset,
  			unsigned int sdma_engine);
+int pm_create_runlist_ib(struct packet_manager *pm,
+				struct list_head *queues,
+				uint64_t *rl_gpu_addr,
+				size_t *rl_size_bytes);
  void pm_destroy_runlist_ib(struct packet_manager *pm);
/* Following PM funcs can be shared among VI and AI */
_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux