umc query_ras_error_count will be invoked to query umc correctable and uncorrectable error. It will reset the umc ras error counter after the query. Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> Reviewed-by: John Clements <John.Clements@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 90 +++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/umc_v6_7.h | 2 + 2 files changed, 92 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 05fec10b1ed9..fe666ac93b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -27,6 +27,13 @@ #include "umc/umc_6_7_0_offset.h" #include "umc/umc_6_7_0_sh_mask.h" +static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst; +} + static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) @@ -94,6 +101,89 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev *error_count += 1; } +static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, + regUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, + regUMCCH0_0_EccErrCnt); + + /* select the lower chip */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, + UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, + ecc_err_cnt_sel); + + /* clear lower chip error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V6_7_CE_CNT_INIT); + + /* select the higher chip */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, + UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, + ecc_err_cnt_sel); + + /* clear higher chip error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V6_7_CE_CNT_INIT); +} + +static void umc_v6_7_reset_error_count(struct amdgpu_device *adev) +{ + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_7_reset_error_count_per_channel(adev, + umc_reg_offset); + } +} + +static void umc_v6_7_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + + /*TODO: driver needs to toggle DF Cstate to ensure + * safe access of UMC registers. Will add the protection */ + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, + ch_inst); + umc_v6_7_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v6_7_querry_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + } + + umc_v6_7_reset_error_count(adev); +} + const struct amdgpu_umc_funcs umc_v6_7_funcs = { .ras_late_init = amdgpu_umc_ras_late_init, + .query_ras_error_count = umc_v6_7_query_ras_error_count, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index 6b881226b4f3..e59dbdb6ef9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -30,6 +30,8 @@ /* umc ce count initial value */ #define UMC_V6_7_CE_CNT_INIT (UMC_V6_7_CE_CNT_MAX - UMC_V6_7_CE_INT_THRESHOLD) +#define UMC_V6_7_INST_DIST 0x40000 + extern const struct amdgpu_umc_funcs umc_v6_7_funcs; #endif -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx