[Why] Page table of compute VM in the VRAM will lost after gpu reset. VRAM won't be restored since compute VM has no shadows. [How] Use higher 32-bit of vm->generation to record a vram_lost_counter. Reset the VM state machine when vm->genertaion is not equal to re-generation token. v2: Check vm->generation instead of calling drm_sched_entity_error in amdgpu_vm_validate. Signed-off-by: ZhenGuo Yin <zhenguo.yin@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3abfa66d72a2..9e2f84c166e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (!vm) return result; - result += vm->generation; + result += (vm->generation & 0xFFFFFFFF); /* Add one if the page tables will be re-generated on next CS */ if (drm_sched_entity_error(&vm->delayed)) ++result; @@ -467,9 +467,12 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *shadow; struct amdgpu_bo *bo; int r; + uint32_t vram_lost_counter = atomic_read(&adev->vram_lost_counter); - if (drm_sched_entity_error(&vm->delayed)) { - ++vm->generation; + if (vm->generation != amdgpu_vm_generation(adev, vm)) { + if (drm_sched_entity_error(&vm->delayed)) + ++vm->generation; + vm->generation = (u64)vram_lost_counter << 32 | (vm->generation & 0xFFFFFFFF); amdgpu_vm_bo_reset_state_machine(vm); amdgpu_vm_fini_entities(vm); r = amdgpu_vm_init_entities(adev, vm); @@ -2439,7 +2442,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->last_update = dma_fence_get_stub(); vm->last_unlocked = dma_fence_get_stub(); vm->last_tlb_flush = dma_fence_get_stub(); - vm->generation = 0; + vm->generation = (u64)atomic_read(&adev->vram_lost_counter) << 32; mutex_init(&vm->eviction_lock); vm->evicting = false; -- 2.35.1