[AMD Official Use Only] Ping... > -----Original Message----- > From: Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Sent: Wednesday, January 26, 2022 7:05 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking > <Hawking.Zhang@xxxxxxx>; Yang, Stanley <Stanley.Yang@xxxxxxx>; Chai, > Thomas <YiPeng.Chai@xxxxxxx>; Clements, John <John.Clements@xxxxxxx>; > Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Subject: [PATCH] drm/amdgpu: add umc_convert_error_address to simplify > code > > Make code reusable and more simple. > > Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 94 +++++++++------------------ > drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 82 +++++++++-------------- > 2 files changed, 61 insertions(+), 115 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > index 47452b61b615..4abcdda42ac6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > @@ -114,21 +114,13 @@ static void > umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, > } > } > > -static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device > *adev, > - struct ras_err_data *err_data, > - uint32_t ch_inst, > - uint32_t umc_inst) > +static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, > + struct ras_err_data *err_data, uint32_t > ch_inst, > + uint32_t umc_inst, uint64_t err_addr, > + uint64_t mc_umc_status) > { > - uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column; > uint32_t channel_index; > - uint32_t eccinfo_table_idx; > - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); > - > - eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; > - channel_index = > - adev->umc.channel_idx_tbl[umc_inst * adev- > >umc.channel_inst_num + ch_inst]; > - > - mc_umc_status = ras- > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; > + uint64_t soc_pa, retired_page, column; > > if (mc_umc_status == 0) > return; > @@ -136,12 +128,13 @@ static void > umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, > if (!err_data->err_addr) > return; > > + channel_index = > + adev->umc.channel_idx_tbl[umc_inst * adev- > >umc.channel_inst_num + > +ch_inst]; > + > /* calculate error address if ue/ce error is detected */ > if (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && > (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || > REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { > - > - err_addr = ras- > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; > err_addr = REG_GET_FIELD(err_addr, > MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); > > /* translate umc channel address to soc pa, 3 parts are included > */ @@ -173,6 +166,23 @@ static void > umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, > } > } > > +static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device > *adev, > + struct ras_err_data *err_data, > + uint32_t ch_inst, > + uint32_t umc_inst) > +{ > + uint64_t mc_umc_status, err_addr; > + uint32_t eccinfo_table_idx; > + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); > + > + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; > + mc_umc_status = ras- > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; > + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; > + > + umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst, > + err_addr, mc_umc_status); > +} > + > static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device > *adev, > void *ras_error_status) > { > @@ -348,9 +358,7 @@ static void umc_v6_7_query_error_address(struct > amdgpu_device *adev, > uint32_t umc_inst) > { > uint32_t mc_umc_status_addr; > - uint32_t channel_index; > - uint64_t mc_umc_status, mc_umc_addrt0; > - uint64_t err_addr, soc_pa, retired_page, column; > + uint64_t mc_umc_status, mc_umc_addrt0, err_addr; > > mc_umc_status_addr = > SOC15_REG_OFFSET(UMC, 0, > regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -358,54 +366,10 @@ static > void umc_v6_7_query_error_address(struct amdgpu_device *adev, > SOC15_REG_OFFSET(UMC, 0, > regMCA_UMC_UMC0_MCUMC_ADDRT0); > > mc_umc_status = RREG64_PCIE((mc_umc_status_addr + > umc_reg_offset) * 4); > + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); > > - if (mc_umc_status == 0) > - return; > - > - if (!err_data->err_addr) { > - /* clear umc status */ > - WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, > 0x0ULL); > - return; > - } > - > - channel_index = > - adev->umc.channel_idx_tbl[umc_inst * adev- > >umc.channel_inst_num + ch_inst]; > - > - /* calculate error address if ue/ce error is detected */ > - if (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && > - (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || > - REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { > - > - err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * > 4); > - err_addr = REG_GET_FIELD(err_addr, > MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); > - > - /* translate umc channel address to soc pa, 3 parts are included > */ > - soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | > - ADDR_OF_256B_BLOCK(channel_index) | > - OFFSET_IN_256B_BLOCK(err_addr); > - > - /* The umc channel bits are not original values, they are hashed > */ > - SET_CHANNEL_HASH(channel_index, soc_pa); > - > - /* clear [C4 C3 C2] in soc physical address */ > - soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); > - > - /* we only save ue error information currently, ce is skipped */ > - if (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, UECC) > - == 1) { > - /* loop for all possibilities of [C4 C3 C2] */ > - for (column = 0; column < > UMC_V6_7_NA_MAP_PA_NUM; column++) { > - retired_page = soc_pa | (column << > UMC_V6_7_PA_C2_BIT); > - amdgpu_umc_fill_error_record(err_data, > err_addr, > - retired_page, channel_index, umc_inst); > - > - /* shift R14 bit */ > - retired_page ^= (0x1ULL << > UMC_V6_7_PA_R14_BIT); > - amdgpu_umc_fill_error_record(err_data, > err_addr, > - retired_page, channel_index, umc_inst); > - } > - } > - } > + umc_v6_7_convert_error_address(adev, err_data, ch_inst, umc_inst, > + err_addr, mc_umc_status); > > /* clear umc status */ > WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); > diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c > b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c > index de85a998ef99..df15b87ae12b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c > +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c > @@ -115,21 +115,13 @@ static void > umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, > } > } > > -static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device > *adev, > - struct ras_err_data *err_data, > - uint32_t ch_inst, > - uint32_t umc_inst) > +static void umc_v8_7_convert_error_address(struct amdgpu_device *adev, > + struct ras_err_data *err_data, uint32_t > ch_inst, > + uint32_t umc_inst, uint64_t err_addr, > + uint64_t mc_umc_status) > { > - uint64_t mc_umc_status, err_addr, retired_page; > - uint32_t channel_index; > - uint32_t eccinfo_table_idx; > - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); > - > - eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; > - channel_index = > - adev->umc.channel_idx_tbl[umc_inst * adev- > >umc.channel_inst_num + ch_inst]; > - > - mc_umc_status = ras- > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; > + uint32_t lsb, channel_index; > + uint64_t retired_page; > > if (mc_umc_status == 0) > return; > @@ -137,13 +129,16 @@ static void > umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, > if (!err_data->err_addr) > return; > > + channel_index = > + adev->umc.channel_idx_tbl[umc_inst * adev- > >umc.channel_inst_num + > +ch_inst]; > + > /* calculate error address if ue/ce error is detected */ > if (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && > (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || > REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { > - > - err_addr = ras- > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; > + lsb = REG_GET_FIELD(err_addr, > MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); > err_addr = REG_GET_FIELD(err_addr, > MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); > + err_addr &= ~((0x1ULL << lsb) - 1); > > /* translate umc channel address to soc pa, 3 parts are included > */ > retired_page = ADDR_OF_4KB_BLOCK(err_addr) | @@ -157,6 > +152,22 @@ static void umc_v8_7_ecc_info_query_error_address(struct > amdgpu_device *adev, > retired_page, channel_index, umc_inst); > } > } > +static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device > *adev, > + struct ras_err_data *err_data, > + uint32_t ch_inst, > + uint32_t umc_inst) > +{ > + uint64_t mc_umc_status, err_addr; > + uint32_t eccinfo_table_idx; > + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); > + > + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; > + mc_umc_status = ras- > >umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; > + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; > + > + umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst, > + err_addr, mc_umc_status); > +} > > static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device > *adev, > void *ras_error_status) > @@ -330,9 +341,8 @@ static void umc_v8_7_query_error_address(struct > amdgpu_device *adev, > uint32_t ch_inst, > uint32_t umc_inst) > { > - uint32_t lsb, mc_umc_status_addr; > - uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; > - uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev- > >umc.channel_inst_num + ch_inst]; > + uint32_t mc_umc_status_addr; > + uint64_t mc_umc_status, err_addr, mc_umc_addrt0; > > mc_umc_status_addr = > SOC15_REG_OFFSET(UMC, 0, > mmMCA_UMC_UMC0_MCUMC_STATUST0); @@ -340,38 +350,10 @@ static > void umc_v8_7_query_error_address(struct amdgpu_device *adev, > SOC15_REG_OFFSET(UMC, 0, > mmMCA_UMC_UMC0_MCUMC_ADDRT0); > > mc_umc_status = RREG64_PCIE((mc_umc_status_addr + > umc_reg_offset) * 4); > + err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); > > - if (mc_umc_status == 0) > - return; > - > - if (!err_data->err_addr) { > - /* clear umc status */ > - WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, > 0x0ULL); > - return; > - } > - > - /* calculate error address if ue/ce error is detected */ > - if (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && > - (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || > - REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { > - > - err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * > 4); > - /* the lowest lsb bits should be ignored */ > - lsb = REG_GET_FIELD(err_addr, > MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); > - err_addr = REG_GET_FIELD(err_addr, > MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); > - err_addr &= ~((0x1ULL << lsb) - 1); > - > - /* translate umc channel address to soc pa, 3 parts are included > */ > - retired_page = ADDR_OF_4KB_BLOCK(err_addr) | > - ADDR_OF_256B_BLOCK(channel_index) | > - OFFSET_IN_256B_BLOCK(err_addr); > - > - /* we only save ue error information currently, ce is skipped */ > - if (REG_GET_FIELD(mc_umc_status, > MCA_UMC_UMC0_MCUMC_STATUST0, UECC) > - == 1) > - amdgpu_umc_fill_error_record(err_data, err_addr, > - retired_page, channel_index, umc_inst); > - } > + umc_v8_7_convert_error_address(adev, err_data, ch_inst, umc_inst, > + err_addr, mc_umc_status); > > /* clear umc status */ > WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); > -- > 2.17.1