GFX10 and up have work group processors (WGP) and WGP mode is the native compile mode. KFD and ROCr have no visibility into whether a dispatch is operating in CU or WGP mode. Enforce CU masking to be pairwise continguous in enablement and round robin distribute CUs across the SEs in a pairwise manner to assume WGP mode at all times. Signed-off-by: Jonathan Kim <jonathan.kim@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 12 +++++++----- .../drm/amd/amdkfd/kfd_process_queue_manager.c | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index 49a283be6b57..7febd1e69d13 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -100,7 +100,9 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, { struct kfd_cu_info cu_info; uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0}; - int i, se, sh, cu, cu_bitmap_sh_mul; + bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0); + uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1; + int i, se, sh, cu, cu_bitmap_sh_mul, inc = wgp_mode_req ? 2 : 1; amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info); @@ -167,13 +169,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, se_mask[i] = 0; i = 0; - for (cu = 0; cu < 16; cu++) { + for (cu = 0; cu < 16; cu = cu + inc) { for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) { for (se = 0; se < cu_info.num_shader_engines; se++) { if (cu_per_sh[se][sh] > cu) { - if (cu_mask[i / 32] & (1 << (i % 32))) - se_mask[se] |= 1 << (cu + sh * 16); - i++; + if (cu_mask[i / 32] & (en_mask << (i % 32))) + se_mask[se] |= en_mask << (cu + sh * 16); + i = i + inc; if (i == cu_mask_count) return; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c9c205df4a14..fc0416f6f83e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -498,6 +498,21 @@ int pqm_update_mqd(struct process_queue_manager *pqm, return -EFAULT; } + /* ASICs that have WGPs must enforce pairwise enabled mask checks. */ + if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK && minfo->cu_mask.ptr && + KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10, 0, 0)) { + int i; + + for (i = 0; i < minfo->cu_mask.count; i = i + 2) { + uint32_t cu_pair = (minfo->cu_mask.ptr[i / 32] >> (i % 32)) & 0x3; + + if (cu_pair && cu_pair != 0x3) { + pr_debug("CUs must be adjacent pairwise enabled.\n"); + return -EINVAL; + } + } + } + retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, pqn->q, minfo); if (retval != 0) -- 2.25.1