Change-Id: I963598ba6eb44bc8620d70e026c0175d1a1de120 Signed-off-by: Chunming Zhou <David1.Zhou at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 77 +++++++++++++++++++++++++++++- 1 file changed, 76 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e5ae0a8..ff11eeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2162,18 +2162,93 @@ retry: amdgpu_atombios_scratch_regs_restore(adev); } if (!r) { + struct amdgpu_ring *buffer_ring = adev->mman.buffer_funcs_ring; + amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); if (r) { dev_err(adev->dev, "ib ring test failed (%d).\n", r); r = amdgpu_suspend(adev); + need_full_reset = true; goto retry; } - + /** + * recovery vm page tables, since we cannot depend on VRAM is no problem + * after gpu full reset. + */ + if (need_full_reset && !(adev->flags & AMD_IS_APU)) { + struct amdgpu_vm *vm, *tmp; + struct amdgpu_ring *ring; + struct fence *fence = NULL; + int i; + + DRM_INFO("recover page table from shadow\n"); + for (i = 0; i < adev->vm_manager.vm_pte_num_rings; i++) { + ring = adev->vm_manager.vm_pte_rings[i]; + amd_sched_rq_block_entity( + &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL], true); + amd_sched_rq_block_entity( + &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL], true); + } + spin_lock(&adev->vm_list_lock); + list_for_each_entry_safe(vm, tmp, &adev->vm_list, list) { + spin_unlock(&adev->vm_list_lock); + amd_sched_block_entity(&vm->shadow_entity, false); + spin_lock(&adev->vm_list_lock); + } + spin_unlock(&adev->vm_list_lock); + for (i = 0; i < adev->vm_manager.vm_pte_num_rings; i++) { + ring = adev->vm_manager.vm_pte_rings[i]; + kthread_unpark(ring->sched.thread); + } + spin_lock(&adev->vm_list_lock); + list_for_each_entry_safe(vm, tmp, &adev->vm_list, list) { + spin_unlock(&adev->vm_list_lock); + /* wait for all shadow jobs finished */ + if (vm->shadow_entity.last_fence) + fence_wait(vm->shadow_entity.last_fence, false); + spin_lock(&adev->vm_list_lock); + } + spin_unlock(&adev->vm_list_lock); + for (i = 0; i < adev->vm_manager.vm_pte_num_rings; i++) { + ring = adev->vm_manager.vm_pte_rings[i]; + kthread_park(ring->sched.thread); + amd_sched_rq_block_entity( + &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL], true); + amd_sched_rq_block_entity( + &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL], true); + } + amd_sched_block_entity(&adev->mman.entity, false); + kthread_unpark(buffer_ring->sched.thread); + spin_lock(&adev->vm_list_lock); + list_for_each_entry_safe(vm, tmp, &adev->vm_list, list) { + spin_unlock(&adev->vm_list_lock); + amdgpu_vm_recover_page_table_from_shadow(adev, vm); + if (vm->shadow_sync_fence) { + fence_put(fence); + fence = fence_get(vm->shadow_sync_fence); + } + spin_lock(&adev->vm_list_lock); + } + spin_unlock(&adev->vm_list_lock); + if (fence) + fence_wait(fence, false); + fence_put(fence); + for (i = 0; i < adev->vm_manager.vm_pte_num_rings; i++) { + ring = adev->vm_manager.vm_pte_rings[i]; + kthread_park(ring->sched.thread); + amd_sched_rq_block_entity( + &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL], false); + amd_sched_rq_block_entity( + &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL], false); + } + } for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring) continue; + + DRM_INFO("ring:%d recover jobs\n", ring->idx); amd_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } -- 1.9.1