Make CU occupancy calculations work on GFX 9.4.3 by updating the logic to handle multiple XCCs correctly. Signed-off-by: Mukul Joshi <mukul.joshi@xxxxxxx> --- v1->v2: - Break into 2 patches, one for the generic change and the other for GFX v9.4.3. - Incorporate Harish's comments. drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 ++++++------ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 +++++++++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index fe8a8e7e9a9a..e6bc808d9c59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -963,14 +963,14 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, */ pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; - soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, inst); - reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, inst, + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0, GET_INST(GC, inst)); + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; if (wave_cnt != 0) { queue_cnt->wave_cnt += wave_cnt; queue_cnt->doorbell_off = - (RREG32_SOC15(GC, inst, mmCP_HQD_PQ_DOORBELL_CONTROL) & + (RREG32_SOC15(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL) & CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK) >> CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; } @@ -1034,7 +1034,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); lock_spi_csq_mutexes(adev); - soc15_grbm_select(adev, 1, 0, 0, 0, inst); + soc15_grbm_select(adev, 1, 0, 0, 0, GET_INST(GC, inst)); /* * Iterate through the shader engines and arrays of the device @@ -1047,7 +1047,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, se_cnt = adev->gfx.config.max_shader_engines; for (se_idx = 0; se_idx < se_cnt; se_idx++) { amdgpu_gfx_select_se_sh(adev, se_idx, 0, 0xffffffff, inst); - queue_map = RREG32_SOC15(GC, inst, mmSPI_CSQ_WF_ACTIVE_STATUS); + queue_map = RREG32_SOC15(GC, GET_INST(GC, inst), mmSPI_CSQ_WF_ACTIVE_STATUS); /* * Assumption: queue map encodes following schema: four @@ -1072,7 +1072,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, } amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, inst); - soc15_grbm_select(adev, 0, 0, 0, 0, inst); + soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, inst)); unlock_spi_csq_mutexes(adev); /* Update the output parameters and return */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d73841268c9b..831e0e92bd23 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -292,8 +292,13 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) wave_cnt = 0; max_waves_per_cu = 0; + /* + * For GFX9.4.3, fetch the CU occupancy from the first XCC in the partition. + * Later we multiply the wave count by number of XCCs in the partition to get + * the total wave counts across all XCCs in the partition. + */ dev->kfd2kgd->get_cu_occupancy(dev->adev, cu_occupancy, - &max_waves_per_cu, 0); + &max_waves_per_cu, ffs(dev->xcc_mask) - 1); for (i = 0; i < AMDGPU_MAX_QUEUES; i++) { if (cu_occupancy[i].wave_cnt != 0 && @@ -302,6 +307,9 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) wave_cnt += cu_occupancy[i].wave_cnt; } + /* Update wave_cnt for the number of XCCs in the partition */ + wave_cnt *= NUM_XCC(dev->xcc_mask); + /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); -- 2.35.1