On Wed, May 29, 2024 at 5:50 AM Sunil Khatri <sunil.khatri@xxxxxxx> wrote: > > Add general registers of gfx9 in ipdump for > devcoredump support. > > Signed-off-by: Sunil Khatri <sunil.khatri@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 124 +++++++++++++++++++++++++- > 1 file changed, 123 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 3c8c5abf35ab..528a20393313 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -149,6 +149,94 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); > #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 > #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 > > +static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { > + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), > + SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), > + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), > + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL), > + SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), > + SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), > + SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), > + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), > + /* cp header registers */ > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), > + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), > + /* SE status registers */ > + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), > + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), > + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), > + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) > +}; This list should probably be split between gfx and non-gfx registers since MI chips don't have a gfx pipe, only compute. If adev->gfx.num_gfx_rings == 0, then the chip does not have a gfx pipe. Also MI300 probably needs special handling because it has multiple XCCs. For now, you might want to skip the IP dump for gfx9 chips with adev->gfx.num_gfx_rings == 0, and then add new register lists for the MI parts later. Alex > + > enum ta_ras_gfx_subblock { > /*CPC*/ > TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, > @@ -1994,6 +2082,20 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, > hw_prio, NULL); > } > > +static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev) > +{ > + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); > + uint32_t *ptr; > + > + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); > + if (ptr == NULL) { > + DRM_ERROR("Failed to allocate memory for IP Dump\n"); > + adev->gfx.ip_dump_core = NULL; > + } else { > + adev->gfx.ip_dump_core = ptr; > + } > +} > + > static int gfx_v9_0_sw_init(void *handle) > { > int i, j, k, r, ring_id; > @@ -2171,6 +2273,8 @@ static int gfx_v9_0_sw_init(void *handle) > return -EINVAL; > } > > + gfx_v9_0_alloc_ip_dump(adev); > + > return 0; > } > > @@ -2206,6 +2310,8 @@ static int gfx_v9_0_sw_fini(void *handle) > } > gfx_v9_0_free_microcode(adev); > > + kfree(adev->gfx.ip_dump_core); > + > return 0; > } > > @@ -6840,6 +6946,22 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) > } > } > > +static void gfx_v9_ip_dump(void *handle) > +{ > + struct amdgpu_device *adev = (struct amdgpu_device *)handle; > + uint32_t i; > + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9); > + > + if (!adev->gfx.ip_dump_core) > + return; > + > + amdgpu_gfx_off_ctrl(adev, false); > + for (i = 0; i < reg_count; i++) > + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i])); > + amdgpu_gfx_off_ctrl(adev, true); > + > +} > + > static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { > .name = "gfx_v9_0", > .early_init = gfx_v9_0_early_init, > @@ -6856,7 +6978,7 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { > .set_clockgating_state = gfx_v9_0_set_clockgating_state, > .set_powergating_state = gfx_v9_0_set_powergating_state, > .get_clockgating_state = gfx_v9_0_get_clockgating_state, > - .dump_ip_state = NULL, > + .dump_ip_state = gfx_v9_ip_dump, > .print_ip_state = NULL, > }; > > -- > 2.34.1 >