[AMD Official Use Only - General] Hi Tao, > -----Original Message----- > From: Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Sent: Friday, September 23, 2022 5:21 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, Hawking > <Hawking.Zhang@xxxxxxx>; Yang, Stanley <Stanley.Yang@xxxxxxx> > Cc: Zhou1, Tao <Tao.Zhou1@xxxxxxx> > Subject: [PATCH 1/4] drm/amdgpu: export umc error address translation > interface > > Make it globally so we can convert specific mca address. > > Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 6 ++++++ > drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 11 +++++------ > 2 files changed, 11 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > index 3629d8f292ef..31fbefaaf676 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h > @@ -22,6 +22,8 @@ > #define __AMDGPU_UMC_H__ > #include "amdgpu_ras.h" > > +#define UMC_INVALID_ADDR 0x1ULL > + > /* > * (addr / 256) * 4096, the higher 26 bits in ErrorAddr > * is the index of 4KB block > @@ -51,6 +53,10 @@ struct amdgpu_umc_ras { > struct amdgpu_ras_block_object ras_block; > void (*err_cnt_init)(struct amdgpu_device *adev); > bool (*query_ras_poison_mode)(struct amdgpu_device *adev); > + void (*query_error_address_per_channel)(struct amdgpu_device > *adev, > + struct ras_err_data > *err_data, > + uint32_t umc_reg_offset, > uint32_t ch_inst, > + uint32_t umc_inst, uint64_t > mca_addr); > void (*ecc_info_query_ras_error_count)(struct amdgpu_device > *adev, > void *ras_error_status); > void (*ecc_info_query_ras_error_address)(struct amdgpu_device > *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > index bf7524f16b66..0f1b215653f3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c > @@ -452,9 +452,8 @@ static void umc_v6_7_query_ras_error_count(struct > amdgpu_device *adev, > > static void umc_v6_7_query_error_address(struct amdgpu_device *adev, > struct ras_err_data *err_data, > - uint32_t umc_reg_offset, > - uint32_t ch_inst, > - uint32_t umc_inst) > + uint32_t umc_reg_offset, uint32_t > ch_inst, > + uint32_t umc_inst, uint64_t > mca_addr) > { > uint32_t mc_umc_status_addr; > uint32_t channel_index; > @@ -540,9 +539,8 @@ static void > umc_v6_7_query_ras_error_address(struct amdgpu_device *adev, > ch_inst); > umc_v6_7_query_error_address(adev, > err_data, > - umc_reg_offset, > - ch_inst, > - umc_inst); > + umc_reg_offset, ch_inst, > + umc_inst, UMC_INVALID_ADDR); > } > } > > @@ -583,4 +581,5 @@ struct amdgpu_umc_ras umc_v6_7_ras = { > .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, > .ecc_info_query_ras_error_count = > umc_v6_7_ecc_info_query_ras_error_count, > .ecc_info_query_ras_error_address = > umc_v6_7_ecc_info_query_ras_error_address, > + .query_error_address_per_channel = > umc_v6_7_query_error_address, Stanley: According to patch#3, it's better to rename query_error_address_per_channel to covert/query_error_address_at_specific_channel due to the channel_instance and umc_instance get form the mce structure, using per_channel may cause misunderstanding. > }; > -- > 2.35.1