[AMD Official Use Only] > -----Original Message----- > From: Lazar, Lijo <Lijo.Lazar@xxxxxxx> > Sent: Monday, March 14, 2022 5:44 PM > To: Zhou1, Tao <Tao.Zhou1@xxxxxxx>; amd-gfx@xxxxxxxxxxxxxxxxxxxxx; Zhang, > Hawking <Hawking.Zhang@xxxxxxx>; Yang, Stanley > <Stanley.Yang@xxxxxxx>; Chai, Thomas <YiPeng.Chai@xxxxxxx> > Subject: Re: [PATCH 2/3] drm/amdgpu: add utcl2 RAS poison functions for > Aldebaran > > > > On 3/14/2022 12:33 PM, Tao Zhou wrote: > > Add help functions to check and clear RAS utcl2 poison status. > > > > Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> > > --- > > .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c | 28 > ++++++++++++++++++- > > .../gpu/drm/amd/include/kgd_kfd_interface.h | 3 ++ > > 2 files changed, 30 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > index c8935d718207..ebd7d36d099b 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c > > @@ -23,6 +23,30 @@ > > #include "amdgpu_amdkfd.h" > > #include "amdgpu_amdkfd_arcturus.h" > > #include "amdgpu_amdkfd_gfx_v9.h" > > +#include "soc15.h" > > +#include "gc/gc_9_4_2_sh_mask.h" > > + > > +static bool kgd_aldebaran_is_ras_utcl2_poison(struct amdgpu_device *adev, > > + uint16_t client_id) > > +{ > > + uint32_t status = 0; > > + struct amdgpu_vmhub *hub; > > + > > + if (client_id != SOC15_IH_CLIENTID_UTCL2) > > + return false; > > + > > Status check is not related to interrupt. Is IH client id needed here? [Tao] I'll remove the check for client_id since it has been checked in vm fault handler. > > Thanks, > Lijo > > > + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; > > + status = RREG32(hub->vm_l2_pro_fault_status); > > + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, > FED); } > > + > > +static void kgd_aldebaran_utcl2_fault_clear(struct amdgpu_device > > +*adev) { > > + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; > > + > > + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; > > + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } > > > > const struct kfd2kgd_calls aldebaran_kfd2kgd = { > > .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, > > @@ -41,5 +65,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { > > .get_atc_vmid_pasid_mapping_info = > > > kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, > > .set_vm_context_page_table_base = > kgd_gfx_v9_set_vm_context_page_table_base, > > - .program_trap_handler_settings = > kgd_gfx_v9_program_trap_handler_settings > > + .program_trap_handler_settings = > kgd_gfx_v9_program_trap_handler_settings, > > + .is_ras_utcl2_poison = kgd_aldebaran_is_ras_utcl2_poison, > > + .utcl2_fault_clear = kgd_aldebaran_utcl2_fault_clear > > }; > > diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > > b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > > index 2f60cf35a444..78400479193e 100644 > > --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > > +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h > > @@ -291,6 +291,9 @@ struct kfd2kgd_calls { > > int *wave_cnt, int *max_waves_per_cu); > > void (*program_trap_handler_settings)(struct amdgpu_device *adev, > > uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); > > + bool (*is_ras_utcl2_poison)(struct amdgpu_device *adev, > > + uint16_t client_id); > > + void (*utcl2_fault_clear)(struct amdgpu_device *adev); > > }; > > > > #endif /* KGD_KFD_INTERFACE_H_INCLUDED */ > >