On 2023-07-07 11:56, Kim, Jonathan wrote:
[Public]
-----Original Message-----
From: Huang, JinHuiEric <JinHuiEric.Huang@xxxxxxx>
Sent: Friday, July 7, 2023 11:46 AM
To: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Subject: Re: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
On 2023-07-07 10:59, Kim, Jonathan wrote:
[Public]
-----Original Message-----
From: Huang, JinHuiEric <JinHuiEric.Huang@xxxxxxx>
Sent: Thursday, July 6, 2023 2:19 PM
To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Kim, Jonathan <Jonathan.Kim@xxxxxxx>; Huang, JinHuiEric
<JinHuiEric.Huang@xxxxxxx>
Subject: [PATCH 4/6] drm/amdkfd: enable grace period for xcc instance
each xcc instance needs to get iq wait time and set
grace period accordingly.
Signed-off-by: Eric Huang <jinhuieric.huang@xxxxxxx>
---
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 9 ++++--
.../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +-
.../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 32 +++++++++++-------
-
.../drm/amd/amdkfd/kfd_packet_manager_v9.c | 9 +++---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
5 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a2bff3f01359..0f12c1989e14 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1606,6 +1606,8 @@ static int set_sched_resources(struct
device_queue_manager *dqm)
static int initialize_cpsch(struct device_queue_manager *dqm)
{
+ uint32_t xcc_id, xcc_mask = dqm->dev->xcc_mask;
+
pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
mutex_init(&dqm->lock_hidden);
@@ -1620,8 +1622,11 @@ static int initialize_cpsch(struct
device_queue_manager *dqm)
init_sdma_bitmaps(dqm);
if (dqm->dev->kfd2kgd->get_iq_wait_times)
- dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev,
- &dqm->wait_times, 0);
+ for_each_inst(xcc_id, xcc_mask)
+ dqm->dev->kfd2kgd->get_iq_wait_times(
+ dqm->dev->adev,
+ &dqm->wait_times[xcc_id],
+ xcc_id);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 7dd4b177219d..62a6dc8d3032 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -262,7 +262,7 @@ struct device_queue_manager {
/* used for GFX 9.4.3 only */
uint32_t current_logical_xcc_start;
- uint32_t wait_times;
+ uint32_t wait_times[32];
I think wait_times[16] should be sufficient. We only get the hamming
weight of 16 bits for NUM_XCC and I believe the xcc_mask is declared as a
uint16_t in the KGD portion anyway. We may as well align to that.
wait_queue_head_t destroy_wait;
};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 401096c103b2..f37ab4b6d88c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -374,27 +374,31 @@ int pm_update_grace_period(struct
packet_manager *pm, uint32_t grace_period)
{
int retval = 0;
uint32_t *buffer, size;
+ uint32_t xcc_id, xcc_mask = pm->dqm->dev->xcc_mask;
size = pm->pmf->set_grace_period_size;
mutex_lock(&pm->lock);
if (size) {
- kq_acquire_packet_buffer(pm->priv_queue,
- size / sizeof(uint32_t),
- (unsigned int **)&buffer);
-
- if (!buffer) {
- pr_err("Failed to allocate buffer on kernel queue\n");
- retval = -ENOMEM;
- goto out;
- }
+ for_each_inst(xcc_id, xcc_mask) {
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
- retval = pm->pmf->set_grace_period(pm, buffer,
grace_period);
- if (!retval)
- kq_submit_packet(pm->priv_queue);
- else
- kq_rollback_packet(pm->priv_queue);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel
queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+
+ retval = pm->pmf->set_grace_period(pm, buffer,
+ grace_period, xcc_id);
+ if (!retval)
+ kq_submit_packet(pm->priv_queue);
+ else
+ kq_rollback_packet(pm->priv_queue);
In the event of partial success do we need to roll back (i.e. resubmit default
grace period) on failure?
The function pm_set_grace_period_v9 always return 0, and it is not
complicate operation, it should be always successful. Partial success
will not be the case we should care about at this moment.
There's a roll back logic already built-in prior to this that's now only partial with this patch.
Either way, after a side discussion with Mukul, it looks like the CP register to set the default grace period is done by the master XCC per partition so we'd can just stick to the current way of setting the grace period as long as we reference the start_xcc_id correctly on read and write.
The start_xcc_id is not defined in amd_staging_drm_next. If we assume 0
is always the first instance number, the instance parameter in function
get_iq_wait_times and build_grace_period_packet_info will be not needed.
Regards,
Eric
Plus the suspend call will set the grace period per partition, so the wait_times array wasn't required in the first place.
Thanks,
Jon
Regards,
Eric
I believe the default grace period is put in place for better CWSR
performance in normal mode, so leaving fast preemption settings on failure
could impact performance.
Thanks,
Jon
+ }
}
out:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 8fda16e6fee6..a9443d661957 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -287,7 +287,8 @@ static int pm_map_queues_v9(struct
packet_manager
*pm, uint32_t *buffer,
static int pm_set_grace_period_v9(struct packet_manager *pm,
uint32_t *buffer,
- uint32_t grace_period)
+ uint32_t grace_period,
+ uint32_t inst)
{
struct pm4_mec_write_data_mmio *packet;
uint32_t reg_offset = 0;
@@ -295,14 +296,14 @@ static int pm_set_grace_period_v9(struct
packet_manager *pm,
pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
pm->dqm->dev->adev,
- pm->dqm->wait_times,
+ pm->dqm->wait_times[inst],
grace_period,
®_offset,
®_data,
- 0);
+ inst);
if (grace_period == USE_DEFAULT_GRACE_PERIOD)
- reg_data = pm->dqm->wait_times;
+ reg_data = pm->dqm->wait_times[inst];
packet = (struct pm4_mec_write_data_mmio *)buffer;
memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio));
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d4c9ee3f9953..22c4a403ddd7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1400,7 +1400,7 @@ struct packet_manager_funcs {
enum kfd_unmap_queues_filter mode,
uint32_t filter_param, bool reset);
int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer,
- uint32_t grace_period);
+ uint32_t grace_period, uint32_t inst);
int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
uint64_t fence_address, uint64_t
fence_value);
int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
--
2.34.1