This v2 patch extends to all GMC versions. Regards, Alex > -----Original Message----- > From: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@xxxxxxx> > Sent: Friday, April 22, 2022 9:25 PM > To: amd-gfx@xxxxxxxxxxxxxxxxxxxxx > Cc: Sierra Guiza, Alejandro (Alex) <Alex.Sierra@xxxxxxx> > Subject: [PATCH v2] drm/amdgpu: replace VM fault error by info logs > > This is not a kernel error. These logs are caused by VM faults that could not > be handled. Typically, generated by user mode applications. > > Signed-off-by: Alex Sierra <alex.sierra@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 14 +++++++------- > drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 14 +++++++------- > drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 4 ++-- > drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 8 ++++---- > drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 8 ++++---- > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 8 ++++---- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 20 ++++++++++---------- > drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 14 +++++++------- > drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 14 +++++++------- > 9 files changed, 52 insertions(+), 52 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c > b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c > index 6e0ace2fbfab..c226a4803086 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c > @@ -79,25 +79,25 @@ gfxhub_v2_0_print_l2_protection_fault_status(struct > amdgpu_device *adev, > u32 cid = REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, > CID); > > - dev_err(adev->dev, > + dev_info(adev->dev, > "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", > status); > - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > + dev_info(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : > gfxhub_client_ids[cid], > cid); > - dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); > - dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); > - dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, > PERMISSION_FAULTS)); > - dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, > MAPPING_ERROR)); > - dev_err(adev->dev, "\t RW: 0x%lx\n", > + dev_info(adev->dev, "\t RW: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, RW)); } diff --git > a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c > b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c > index ff738e9725ee..fdcca1477592 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c > @@ -82,25 +82,25 @@ gfxhub_v2_1_print_l2_protection_fault_status(struct > amdgpu_device *adev, > u32 cid = REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, > CID); > > - dev_err(adev->dev, > + dev_info(adev->dev, > "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", > status); > - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > + dev_info(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : > gfxhub_client_ids[cid], > cid); > - dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); > - dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); > - dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, > PERMISSION_FAULTS)); > - dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, > MAPPING_ERROR)); > - dev_err(adev->dev, "\t RW: 0x%lx\n", > + dev_info(adev->dev, "\t RW: 0x%lx\n", > REG_GET_FIELD(status, > GCVM_L2_PROTECTION_FAULT_STATUS, RW)); } diff --git > a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > index a455e59f41f4..864fcc0edb90 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c > @@ -148,14 +148,14 @@ static int gmc_v10_0_process_interrupt(struct > amdgpu_device *adev, > memset(&task_info, 0, sizeof(struct amdgpu_task_info)); > amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); > > - dev_err(adev->dev, > + dev_info(adev->dev, > "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " > "for process %s pid %d thread %s pid %d)\n", > entry->vmid_src ? "mmhub" : "gfxhub", > entry->src_id, entry->ring_id, entry->vmid, > entry->pasid, task_info.process_name, task_info.tgid, > task_info.task_name, task_info.pid); > - dev_err(adev->dev, " in page starting at address 0x%016llx from > client 0x%x (%s)\n", > + dev_info(adev->dev, " in page starting at address 0x%016llx from > +client 0x%x (%s)\n", > addr, entry->client_id, > soc15_ih_clientid_name[entry->client_id]); > > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > index ec291d28edff..3d830fd7706b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c > @@ -620,7 +620,7 @@ static void gmc_v6_0_vm_decode_fault(struct > amdgpu_device *adev, > mc_id = REG_GET_FIELD(status, > VM_CONTEXT1_PROTECTION_FAULT_STATUS, > MEMORY_CLIENT_ID); > > - dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s > from '%s' (0x%08x) (%d)\n", > + dev_info(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s > from > +'%s' (0x%08x) (%d)\n", > protections, vmid, addr, > REG_GET_FIELD(status, > VM_CONTEXT1_PROTECTION_FAULT_STATUS, > MEMORY_CLIENT_RW) ? > @@ -1083,11 +1083,11 @@ static int gmc_v6_0_process_interrupt(struct > amdgpu_device *adev, > gmc_v6_0_set_fault_enable_default(adev, false); > > if (printk_ratelimit()) { > - dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", > + dev_info(adev->dev, "GPU fault detected: %d 0x%08x\n", > entry->src_id, entry->src_data[0]); > - dev_err(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", > + dev_info(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", > addr); > - dev_err(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", > + dev_info(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", > status); > gmc_v6_0_vm_decode_fault(adev, status, addr, 0); > } > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > index 979da6f510e8..1f3ceb03b47b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c > @@ -781,7 +781,7 @@ static void gmc_v7_0_vm_decode_fault(struct > amdgpu_device *adev, u32 status, > mc_id = REG_GET_FIELD(status, > VM_CONTEXT1_PROTECTION_FAULT_STATUS, > MEMORY_CLIENT_ID); > > - dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page > %u, %s from '%s' (0x%08x) (%d)\n", > + dev_info(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page > %u, > +%s from '%s' (0x%08x) (%d)\n", > protections, vmid, pasid, addr, > REG_GET_FIELD(status, > VM_CONTEXT1_PROTECTION_FAULT_STATUS, > MEMORY_CLIENT_RW) ? > @@ -1286,11 +1286,11 @@ static int gmc_v7_0_process_interrupt(struct > amdgpu_device *adev, > gmc_v7_0_set_fault_enable_default(adev, false); > > if (printk_ratelimit()) { > - dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", > + dev_info(adev->dev, "GPU fault detected: %d 0x%08x\n", > entry->src_id, entry->src_data[0]); > - dev_err(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", > + dev_info(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", > addr); > - dev_err(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", > + dev_info(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", > status); > gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client, > entry->pasid); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 382dde1ce74c..5be3f4f77c49 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -1021,7 +1021,7 @@ static void gmc_v8_0_vm_decode_fault(struct > amdgpu_device *adev, u32 status, > mc_id = REG_GET_FIELD(status, > VM_CONTEXT1_PROTECTION_FAULT_STATUS, > MEMORY_CLIENT_ID); > > - dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page > %u, %s from '%s' (0x%08x) (%d)\n", > + dev_info(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page > %u, > +%s from '%s' (0x%08x) (%d)\n", > protections, vmid, pasid, addr, > REG_GET_FIELD(status, > VM_CONTEXT1_PROTECTION_FAULT_STATUS, > MEMORY_CLIENT_RW) ? > @@ -1466,12 +1466,12 @@ static int gmc_v8_0_process_interrupt(struct > amdgpu_device *adev, > memset(&task_info, 0, sizeof(struct amdgpu_task_info)); > amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); > > - dev_err(adev->dev, "GPU fault detected: %d 0x%08x for > process %s pid %d thread %s pid %d\n", > + dev_info(adev->dev, "GPU fault detected: %d 0x%08x for > process %s pid > +%d thread %s pid %d\n", > entry->src_id, entry->src_data[0], > task_info.process_name, > task_info.tgid, task_info.task_name, task_info.pid); > - dev_err(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", > + dev_info(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", > addr); > - dev_err(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", > + dev_info(adev->dev, " > VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", > status); > gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client, > entry->pasid); > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 22761a3bb818..98c8de7307be 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -582,14 +582,14 @@ static int gmc_v9_0_process_interrupt(struct > amdgpu_device *adev, > memset(&task_info, 0, sizeof(struct amdgpu_task_info)); > amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); > > - dev_err(adev->dev, > + dev_info(adev->dev, > "[%s] %s page fault (src_id:%u ring:%u vmid:%u " > "pasid:%u, for process %s pid %d thread %s pid %d)\n", > hub_name, retry_fault ? "retry" : "no-retry", > entry->src_id, entry->ring_id, entry->vmid, > entry->pasid, task_info.process_name, task_info.tgid, > task_info.task_name, task_info.pid); > - dev_err(adev->dev, " in page starting at address 0x%016llx from IH > client 0x%x (%s)\n", > + dev_info(adev->dev, " in page starting at address 0x%016llx from IH > +client 0x%x (%s)\n", > addr, entry->client_id, > soc15_ih_clientid_name[entry->client_id]); > > @@ -611,11 +611,11 @@ static int gmc_v9_0_process_interrupt(struct > amdgpu_device *adev, > WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); > > > - dev_err(adev->dev, > + dev_info(adev->dev, > "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", > status); > if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { > - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > + dev_info(adev->dev, "\t Faulty UTCL2 client ID: %s > (0x%x)\n", > cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : > gfxhub_client_ids[cid], > cid); > @@ -648,22 +648,22 @@ static int gmc_v9_0_process_interrupt(struct > amdgpu_device *adev, > mmhub_cid = NULL; > break; > } > - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > + dev_info(adev->dev, "\t Faulty UTCL2 client ID: %s > (0x%x)\n", > mmhub_cid ? mmhub_cid : "unknown", cid); > } > - dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); > - dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); > - dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > VM_L2_PROTECTION_FAULT_STATUS, > PERMISSION_FAULTS)); > - dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); > - dev_err(adev->dev, "\t RW: 0x%x\n", rw); > + dev_info(adev->dev, "\t RW: 0x%x\n", rw); > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c > b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c > index 636abd855686..ec8c8b2cab36 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c > @@ -150,7 +150,7 @@ > mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device > *adev, > rw = REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, RW); > > - dev_err(adev->dev, > + dev_info(adev->dev, > "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", > status); > switch (adev->ip_versions[MMHUB_HWIP][0]) { @@ -169,21 +169,21 > @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device > *adev, > mmhub_cid = NULL; > break; > } > - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > + dev_info(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > mmhub_cid ? mmhub_cid : "unknown", cid); > - dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); > - dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, > WALKER_ERROR)); > - dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, > PERMISSION_FAULTS)); > - dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, > MAPPING_ERROR)); > - dev_err(adev->dev, "\t RW: 0x%x\n", rw); > + dev_info(adev->dev, "\t RW: 0x%x\n", rw); > } > > static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, > uint32_t vmid, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c > b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c > index ff44c5364a8c..72dda850e7d3 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c > +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c > @@ -87,7 +87,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct > amdgpu_device *adev, > rw = REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, RW); > > - dev_err(adev->dev, > + dev_info(adev->dev, > "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", > status); > switch (adev->ip_versions[MMHUB_HWIP][0]) { @@ -100,21 +100,21 > @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device > *adev, > mmhub_cid = NULL; > break; > } > - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > + dev_info(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", > mmhub_cid ? mmhub_cid : "unknown", cid); > - dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t MORE_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); > - dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t WALKER_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, > WALKER_ERROR)); > - dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > + dev_info(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, > PERMISSION_FAULTS)); > - dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > + dev_info(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", > REG_GET_FIELD(status, > MMVM_L2_PROTECTION_FAULT_STATUS, > MAPPING_ERROR)); > - dev_err(adev->dev, "\t RW: 0x%x\n", rw); > + dev_info(adev->dev, "\t RW: 0x%x\n", rw); > } > > static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev, > -- > 2.32.0