In certain cooperative group dispatch scenarios the default SPI resource allocation may cause reduced per-CU workgroup occupancy. Set COMPUTE_RESOURCE_LIMITS.FORCE_SIMD_DIST=1 to mitigate soft hang scenarions. Suggested-by: Joseph Greathouse <Joseph.Greathouse@xxxxxxx> Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@xxxxxxx> --- * Found a bug in the previous reviewed version https://lists.freedesktop.org/archives/amd-gfx/2024-February/104101.html since the q->is_gws is unset for keeping the count. * updated pqm_set_gws to pass minfo holding gws state for the active queues and use that to apply the FORCE_SIMD_DIST_MASK. drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 4 ++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +++- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 42d881809dc7..0b71db4c96b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -303,6 +303,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); + if (minfo && KFD_GC_VERSION(mm->dev) >= IP_VERSION(9, 4, 2)) + m->compute_resource_limits = minfo->gws ? + COMPUTE_RESOURCE_LIMITS__FORCE_SIMD_DIST_MASK : 0; + q->is_active = QUEUE_IS_ACTIVE(*q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 677281c0793e..f4b327a2d4a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -542,6 +542,7 @@ struct mqd_update_info { } cu_mask; }; enum mqd_update_flag update_flag; + bool gws; }; /** diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 43eff221eae5..5416a110ced9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -95,6 +95,7 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws) { + struct mqd_update_info minfo = {0}; struct kfd_node *dev = NULL; struct process_queue_node *pqn; struct kfd_process_device *pdd; @@ -146,9 +147,10 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, } pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0; + minfo.gws = !!gws; return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, - pqn->q, NULL); + pqn->q, &minfo); } void kfd_process_dequeue_from_all_devices(struct kfd_process *p) -- 2.34.1