[AMD Official Use Only - Internal Distribution Only] Seriers is Reviewed-by: Stanley.Yang <Stanley.Yang@xxxxxxx> Regards, Stanley > -----Original Message----- > From: Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Sent: Friday, April 16, 2021 5:44 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Yang, Stanley <Stanley.Yang@xxxxxxx>; > John Clements <John.Clemenets@xxxxxxx>; Li, Dennis > <Dennis.Li@xxxxxxx> > Cc: Zhang, Hawking <Hawking.Zhang@xxxxxxx> > Subject: [PATCH 2/2] drm/amdgpu: only harvest gcea/mmea error status in > aldebaran > > In aldebaran, driver only needs to harvest SDP RdRspStatus, WrRspStatus > and first parity error on RdRsp data. Check error type before harvest error > information. > > Signed-off-by: Hawking Zhang <Hawking.Zhang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 21 ++++++++++++--------- > drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 11 +++++++---- > 2 files changed, 19 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c > index 9ca76a3ac38c..91427543aabe 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c > @@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block > gfx_v9_4_2_utc_blocks[] = { > REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, > WRITE_COUNTERS, 1) }, }; > > -static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs = > +static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = > { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; > > static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, > @@ -1040,11 +1040,11 @@ static void > gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev) > uint32_t i, j; > > mutex_lock(&adev->grbm_idx_mutex); > - for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { > - for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; > + for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { > + for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; > j++) { > gfx_v9_4_2_select_se_sh(adev, i, 0, j); > - > WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_reg > s), 0x10); > + > WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_re > gs), 0x10); > } > } > gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ > -1089,17 +1089,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct > amdgpu_device *adev) > > mutex_lock(&adev->grbm_idx_mutex); > > - for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) { > - for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance; > + for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { > + for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; > j++) { > gfx_v9_4_2_select_se_sh(adev, i, 0, j); > reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( > - gfx_v9_4_2_rdrsp_status_regs)); > - if (reg_value) > + gfx_v9_4_2_ea_err_status_regs)); > + if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, > SDP_RDRSP_STATUS) || > + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, > SDP_WRRSP_STATUS) || > + REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, > +SDP_RDRSP_DATAPARITY_ERROR)) { > dev_warn(adev->dev, "GCEA err detected at > instance: %d, status: 0x%x!\n", > j, reg_value); > + } > /* clear after read */ > - > WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_reg > s), 0x10); > + > WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_re > gs), 0x10); > } > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c > b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c > index d0f41346ea0c..cc69c434d0de 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c > +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c > @@ -1286,7 +1286,7 @@ static void > mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev) > } > } > > -static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = { > +static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = { > { SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 }, > { SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 }, > { SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 }, > @@ -1303,12 +1303,15 @@ static void > mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev) > if (!amdgpu_ras_is_supported(adev, > AMDGPU_RAS_BLOCK__MMHUB)) > return; > > - for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) { > + for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) { > reg_value = > - > RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs > [i])); > - if (reg_value) > + > RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_r > egs[i])); > + if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, > SDP_RDRSP_STATUS) || > + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, > SDP_WRRSP_STATUS) || > + REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, > +SDP_RDRSP_DATAPARITY_ERROR)) { > dev_warn(adev->dev, "MMHUB EA err detected at > instance: %d, status: 0x%x!\n", > i, reg_value); > + } > } > } > > -- > 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx