From: Amber Lin <Amber.Lin@xxxxxxx> On GC 9.4.3, DW 41 in MQD is repurposed as compute_tg_chunk_size for cooperative dispatch. When it's a AQL queue, set compute_tg_chunk_size as 1 to spread work groups evenly among XCCs. If it's PM4 queue, unset compute_tg_chunk_size to disable cooperative mode. v3: set compute_tg_chunk_size as 1 instead of #CUs per XCC v2: set compute_tg_chunk_size as #CUs per XCC instead of total wave slots per XCC Signed-off-by: Amber Lin <Amber.Lin@xxxxxxx> Reviewed-by: Sean Keely <Sean.Keely@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 0778e587a2d6..4dfae19714ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -135,6 +135,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, { uint64_t addr; struct v9_mqd *m; + struct amdgpu_device *adev = (struct amdgpu_device *)mm->dev->adev; m = (struct v9_mqd *) mqd_mem_obj->cpu_ptr; addr = mqd_mem_obj->gpu_addr; @@ -167,6 +168,20 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + /* On GC 9.4.3, DW 41 is re-purposed as + * compute_tg_chunk_size. + * TODO: review this setting when active CUs in the + * partition play a role + */ + m->compute_static_thread_mgmt_se6 = 1; + } + } else { + /* PM4 queue */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + m->compute_static_thread_mgmt_se6 = 0; + /* TODO: program pm4_target_xcc */ + } } if (q->tba_addr) { -- 2.39.2