[AMD Official Use Only - General] Is it better to handle CE and UE list separately? Anyway Reviewed-by: Stanley.Yang <Stanley.Yang@xxxxxxx> Regards, Stanley > -----Original Message----- > From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Tao > Zhou > Sent: Tuesday, October 31, 2023 3:09 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Chai, Thomas <YiPeng.Chai@xxxxxxx>; Zhou1, Tao > <Tao.Zhou1@xxxxxxx> > Subject: [PATCH] drm/amdgpu: handle extra UE register entries for gfx v9_4_3 > > The UE registe list is larger than CE list. > > Reported-by: yipeng.chai@xxxxxxx > Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 38 > +++++++++++++++++++++++++ > 1 file changed, 38 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > index 41bbabd9ad4d..046ae95b366a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c > @@ -3799,6 +3799,27 @@ static void > gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, > } > } > > + /* handle extra register entries of UE */ > + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { > + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { > + for (k = 0; k < > gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { > + /* no need to select if instance number is 1 */ > + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || > + > gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) > + gfx_v9_4_3_xcc_select_se_sh(adev, j, > 0, k, xcc_id); > + > + > amdgpu_ras_inst_query_ras_error_count(adev, > + > &(gfx_v9_4_3_ue_reg_list[i].reg_entry), > + 1, > + > gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_t > ype].mem_id_ent, > + > gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_t > ype].size, > + GET_INST(GC, xcc_id), > + > AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, > + &ue_count); > + } > + } > + } > + > gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, > xcc_id); > mutex_unlock(&adev->grbm_idx_mutex); > @@ -3838,6 +3859,23 @@ static void > gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, > } > } > > + /* handle extra register entries of UE */ > + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { > + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { > + for (k = 0; k < > gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { > + /* no need to select if instance number is 1 */ > + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || > + > gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) > + gfx_v9_4_3_xcc_select_se_sh(adev, j, > 0, k, xcc_id); > + > + > amdgpu_ras_inst_reset_ras_error_count(adev, > + > &(gfx_v9_4_3_ue_reg_list[i].reg_entry), > + 1, > + GET_INST(GC, xcc_id)); > + } > + } > + } > + > gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, > xcc_id); > mutex_unlock(&adev->grbm_idx_mutex); > -- > 2.35.1