When we get a GPU page fault, cache the fault for later analysis. Cc: samuel.pitoiset@xxxxxxxxx Acked-by: Guchun Chen <guchun.chen@xxxxxxx> Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 ++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 18 +++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index dc80c9c8fd14..5423f66a9ed8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2640,3 +2640,34 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) total_done_objs); } #endif + +/** + * amdgpu_vm_update_fault_cache - update cached fault into. + * @adev: amdgpu device pointer + * @pasid: PASID of the VM + * @addr: Address of the fault + * @status: GPUVM fault status register + * @vmhub: which vmhub got the fault + * + * Cache the fault info for later use by userspace in debugging. + */ +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, + unsigned int pasid, + uint64_t addr, + uint32_t status, + unsigned int vmhub) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + + vm = xa_load(&adev->vm_manager.pasids, pasid); + if (vm) { + vm->fault_info.addr = addr; + vm->fault_info.status = status; + vm->fault_info.vmhub = vmhub; + } + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 14f9a2bf3acb..fb66a413110c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -244,6 +244,15 @@ struct amdgpu_vm_update_funcs { struct dma_fence **fence); }; +struct amdgpu_vm_fault_info { + /* fault address */ + uint64_t addr; + /* fault status register */ + uint32_t status; + /* which vmhub? gfxhub, mmhub, etc. */ + unsigned int vmhub; +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; @@ -332,6 +341,9 @@ struct amdgpu_vm { /* Memory partition number, -1 means any partition */ int8_t mem_id; + + /* cached fault info */ + struct amdgpu_vm_fault_info fault_info; }; struct amdgpu_vm_manager { @@ -540,4 +552,10 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) mutex_unlock(&vm->eviction_lock); } +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, + unsigned int pasid, + uint64_t addr, + uint32_t status, + unsigned int vmhub); + #endif -- 2.40.1