Am 19.02.24 um 09:15 schrieb Tao Zhou:
Let kfd interrupt handler process it. Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 773725a92cf1..70defc394b7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -552,7 +552,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, { bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - uint32_t status = 0, cid = 0, rw = 0; + uint32_t status = 0, cid = 0, rw = 0, fed = 0; struct amdgpu_task_info task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; @@ -663,6 +663,14 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + + /* for gfx fed error, kfd will handle it, return directly */ + if (fed && amdgpu_ras_is_poison_mode_supported(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2) && + !strcmp(hub_name, "gfxhub0"))
Please never ever use strcmp() to make a decision like that, *especially* not in an interrupt handler.
Regards, Christian.
+ return 1; + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); #ifdef HAVE_STRUCT_XARRAY amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);