[AMD Official Use Only - General] PMfw doesn't reset any ce/ue status and count in debug mode, who takes responsible for it if in debug mode. Regards, Stanley > -----Original Message----- > From: Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Sent: Tuesday, October 17, 2023 8:46 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking > <Hawking.Zhang@xxxxxxx>; Yang, Stanley <Stanley.Yang@xxxxxxx>; Li, > Candice <Candice.Li@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx>; > Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Wang, Yang(Kevin) > <KevinYang.Wang@xxxxxxx> > Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Subject: [PATCH 6/6] drm/amdgpu: drop status reset for GCEA 9.4.3 and > MMEA 1.8 > > PMFW will be responsible for it. > > Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 22 ------- > drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 86 ------------------------- > 2 files changed, 108 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > index a1c2c952d882..65da72735e52 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > @@ -3996,27 +3996,6 @@ static void > gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, > WREG32_SOC15(GC, GET_INST(GC, xcc_id), > regVML2_WALKER_MEM_ECC_STATUS, 0x3); } > > -static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device > *adev, > - int xcc_id) > -{ > - uint32_t i, j; > - uint32_t value; > - > - mutex_lock(&adev->grbm_idx_mutex); > - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { > - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { > - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); > - value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), > regGCEA_ERR_STATUS); > - value = REG_SET_FIELD(value, GCEA_ERR_STATUS, > - CLEAR_ERROR_STATUS, 0x1); > - WREG32_SOC15(GC, GET_INST(GC, xcc_id), > regGCEA_ERR_STATUS, value); > - } > - } > - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, > - xcc_id); > - mutex_unlock(&adev->grbm_idx_mutex); > -} > - > static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device > *adev, > int xcc_id) > { > @@ -4042,7 +4021,6 @@ static void > gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, > void *ras_error_status, int xcc_id) { > gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); > - gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); > gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); } > > diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c > b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c > index aa00483e7b37..616d75add087 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c > +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c > @@ -756,96 +756,10 @@ static void > mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev) > mmhub_v1_8_inst_query_ras_err_status(adev, i); } > > -static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device > *adev, > - uint32_t mmhub_inst) > -{ > - uint32_t mmea_cgtt_clk_cntl_addr_dist; > - uint32_t mmea_err_status_addr_dist; > - uint32_t reg_value; > - uint32_t i; > - > - /* reset mmea ras err status */ > - mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - > regMMEA0_CGTT_CLK_CTRL; > - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - > regMMEA0_ERR_STATUS; > - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { > - /* force clk branch on for response path > - * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1 > - */ > - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, > - regMMEA0_CGTT_CLK_CTRL, > - i * > mmea_cgtt_clk_cntl_addr_dist); > - reg_value = REG_SET_FIELD(reg_value, > MMEA0_CGTT_CLK_CTRL, > - SOFT_OVERRIDE_RETURN, 1); > - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, > - regMMEA0_CGTT_CLK_CTRL, > - i * mmea_cgtt_clk_cntl_addr_dist, > - reg_value); > - > - /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ > - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, > - regMMEA0_ERR_STATUS, > - i * > mmea_err_status_addr_dist); > - reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, > - CLEAR_ERROR_STATUS, 1); > - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, > - regMMEA0_ERR_STATUS, > - i * mmea_err_status_addr_dist, > - reg_value); > - > - /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 > */ > - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, > - regMMEA0_CGTT_CLK_CTRL, > - i * > mmea_cgtt_clk_cntl_addr_dist); > - reg_value = REG_SET_FIELD(reg_value, > MMEA0_CGTT_CLK_CTRL, > - SOFT_OVERRIDE_RETURN, 0); > - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, > - regMMEA0_CGTT_CLK_CTRL, > - i * mmea_cgtt_clk_cntl_addr_dist, > - reg_value); > - } > - > - /* reset mm_cane ras err status > - * force clk branch on for response path > - * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1 > - */ > - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, > regMM_CANE_ICG_CTRL); > - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, > - SOFT_OVERRIDE_ATRET, 1); > - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, > reg_value); > - > - /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ > - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, > regMM_CANE_ERR_STATUS); > - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS, > - CLEAR_ERROR_STATUS, 1); > - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, > reg_value); > - > - /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */ > - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, > regMM_CANE_ICG_CTRL); > - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, > - SOFT_OVERRIDE_ATRET, 0); > - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, > reg_value); > -} > - > -static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device > *adev) -{ > - uint32_t inst_mask; > - uint32_t i; > - > - if (!amdgpu_ras_is_supported(adev, > AMDGPU_RAS_BLOCK__MMHUB)) { > - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); > - return; > - } > - > - inst_mask = adev->aid_mask; > - for_each_inst(i, inst_mask) > - mmhub_v1_8_inst_reset_ras_err_status(adev, i); > -} > - > static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { > .query_ras_error_count = mmhub_v1_8_query_ras_error_count, > .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, > .query_ras_error_status = mmhub_v1_8_query_ras_error_status, > - .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status, > }; > > struct amdgpu_mmhub_ras mmhub_v1_8_ras = { > -- > 2.35.1