[Why] Page table of compute VM in the VRAM will lost after gpu reset. VRAM won't be restored since compute VM has no shadows. [How] Use higher 32-bit of vm->generation to record a vram_lost_counter. Reset the VM state machine when the counter is not equal to current vram_lost_counter of the device. Signed-off-by: ZhenGuo Yin <zhenguo.yin@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..fd7f912816dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!vm) return result; - result += vm->generation; + result += (vm->generation & 0xFFFFFFFF); /* Add one if the page tables will be re-generated on next CS */ if (drm_sched_entity_error(&vm->delayed)) ++result; @@ -467,6 +467,12 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; + uint32_t vram_lost_counter = atomic_read(&adev->vram_lost_counter); + + if ((vm->generation >> 32) != vram_lost_counter) { + amdgpu_vm_bo_reset_state_machine(vm); + vm->generation = (u64)vram_lost_counter << 32 | (vm->generation & 0xFFFFFFFF); + } if (drm_sched_entity_error(&vm->delayed)) { ++vm->generation; @@ -2439,7 +2445,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); - vm->generation = 0; + vm->generation = (u64)atomic_read(&adev->vram_lost_counter) << 32; mutex_init(&vm->eviction_lock); vm->evicting = false; -- 2.35.1