On 2018å¹´07æ??04æ?¥ 23:04, Andrey Grodzovsky wrote: > Extract and present the reposnsible process and thread when > VM_FAULT happens. > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++ > drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 10 ++++++++-- > drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 +++++++-- > 3 files changed, 27 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 7a625f3..1c483ea 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -187,6 +187,18 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > if (p->uf_entry.robj) > p->job->uf_addr = uf_offset; > kfree(chunk_array); > + > + /* Use this opportunity to fill in task info for the vm */ > + if (!vm->task_info.pid) { > + vm->task_info.pid = current->pid; > + get_task_comm(vm->task_info.task_name, current); > + > + if (current->group_leader->mm == current->mm) { > + vm->task_info.tgid = current->group_leader->pid; > + get_task_comm(vm->task_info.process_name, current->group_leader); > + } > + } > + you can wrap this segment to a function like amdgpu_vm_set_task_info. > return 0; > > free_all_kdata: > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > index 08753e7..7ad19f9 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c > @@ -46,6 +46,7 @@ > > #include "ivsrcid/ivsrcid_vislands30.h" > > +#include "amdgpu_vm.h" > > static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev); > static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); > @@ -1449,8 +1450,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, > gmc_v8_0_set_fault_enable_default(adev, false); > > if (printk_ratelimit()) { > - dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", > - entry->src_id, entry->src_data[0]); > + struct amdgpu_task_info task_info = { 0 }; > + > + amdgpu_vm_task_info(adev, entry->pasid, &task_info); you can rename this function to amdgpu_vm_get_task_info. general, it looks very good to me and does what I want to do before. Thanks, David Zhou > + > + dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", > + entry->src_id, entry->src_data[0], task_info.process_name, > + task_info.tgid, task_info.task_name, task_info.pid); > dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", > addr); > dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", > diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > index 691a659..384a89c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c > @@ -259,11 +259,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, > } > > if (printk_ratelimit()) { > + struct amdgpu_task_info task_info = { 0 }; > + > + amdgpu_vm_task_info(adev, entry->pasid, &task_info); > + > dev_err(adev->dev, > - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", > + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n", > entry->vmid_src ? "mmhub" : "gfxhub", > entry->src_id, entry->ring_id, entry->vmid, > - entry->pasid); > + entry->pasid, task_info.process_name, task_info.tgid, > + task_info.task_name, task_info.pid); > dev_err(adev->dev, " at page 0x%016llx from %d\n", > addr, entry->client_id); > if (!amdgpu_sriov_vf(adev))