On 10/28/2024 5:40 PM, Xiaogang.Chen
wrote:
From: Xiaogang Chen <xiaogang.chen@xxxxxxx> To allow user better understand the cause triggering runlist oversubscription. No function change. Signed-off-by: Xiaogang Chen Xiaogang.Chen@xxxxxxx --- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 55 ++++++++++++++----- 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 37930629edc5..e22be6da23b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -28,6 +28,10 @@ #include "kfd_kernel_queue.h" #include "kfd_priv.h" +#define OVER_SUBSCRIPTION_PROCESS_COUNT 1 << 0 +#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT 1 << 1 +#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT 1 << 2 + static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) { @@ -40,7 +44,7 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, - bool *over_subscription) + int *over_subscription) { unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; @@ -58,17 +62,20 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - *over_subscription = false; + *over_subscription = 0; if (node->max_proc_per_quantum > 1) max_proc_per_quantum = node->max_proc_per_quantum; - if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm) || - gws_queue_count > 1) { - *over_subscription = true; + if (process_count > max_proc_per_quantum) + *over_subscription = *over_subscription || OVER_SUBSCRIPTION_PROCESS_COUNT;
I think you want to use the Bitwise OR (|) and not the Logical OR (||) here. This will always set over_subscription to 1.
Regards,
Mukul
+ if (compute_queue_count > get_cp_queues_num(pm->dqm)) + *over_subscription = *over_subscription || OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; + if (gws_queue_count > 1) + *over_subscription = *over_subscription || OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; + + if (*over_subscription) dev_dbg(dev, "Over subscribed runlist\n"); - } map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ @@ -89,7 +96,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, unsigned int *rl_buffer_size, - bool *is_over_subscription) + int *is_over_subscription) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; @@ -134,7 +141,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; - bool is_over_subscription; + int is_over_subscription; rl_wptr = retval = processes_mapped = 0; @@ -212,16 +219,38 @@ static int pm_create_runlist_ib(struct packet_manager *pm, dev_dbg(dev, "Finished map process and queues to runlist\n"); if (is_over_subscription) { - if (!pm->is_over_subscription) - dev_warn( + if (!pm->is_over_subscription) { + + if (is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT) { + dev_warn( dev, - "Runlist is getting oversubscribed. Expect reduced ROCm performance.\n"); + "process number is more than maximum number of processes that" + " HWS can schedule concurrently. Runlist is getting" + " oversubscribed. Expect reduced ROCm performance.\n"); + } + + if (is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT) { + dev_warn( + dev, + "compute queue number is more than assigned compute queues." + " Runlist is getting" + " oversubscribed. Expect reduced ROCm performance.\n"); + } + + if (is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT) { + dev_warn( + dev, + "compute queue for cooperative workgroup is more than allowed." + " Runlist is getting" + " oversubscribed. Expect reduced ROCm performance.\n"); + } + } retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); } - pm->is_over_subscription = is_over_subscription; + pm->is_over_subscription = is_over_subscription ? true : false; for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]);