If vram is used up, display allocate vram evict the KFD BOs to system memory. KFD schedule restore work to restore BOs back to vram to resume queues. If restore BOs failed, KFD will reschedule the restore work to try again. If display BOs are pinned in vram, KFD restore work will keep retry, and may never success. Set BO max restore count, if restore BO reach the max restore count, keep the BO in system memory, and GPU mapping will update to system memory. Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 703cd5a7b8f7..4670515b3af4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -38,6 +38,7 @@ * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +#define AMDGPU_MAX_BO_RESTORE_COUNT 8 /* max 800ms retry restore */ /* Impose limit on how much memory KFD can use */ static struct { @@ -2042,6 +2043,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) int ret = 0, i; struct list_head duplicate_save; struct amdgpu_sync sync_obj; + unsigned long failed_size = 0; + unsigned long total_size = 0; INIT_LIST_HEAD(&duplicate_save); INIT_LIST_HEAD(&ctx.list); @@ -2098,10 +2101,20 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) uint32_t domain = mem->domain; struct kfd_bo_va_list *bo_va_entry; + total_size += amdgpu_bo_size(bo); + if (atomic_read(&mem->invalid) > AMDGPU_MAX_BO_RESTORE_COUNT) { + pr_debug("Memory eviction: reach max restore count\n"); + domain = AMDGPU_GEM_DOMAIN_GTT; + failed_size += amdgpu_bo_size(bo); + } + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); if (ret) { + atomic_inc(&mem->invalid); pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; + } else { + atomic_set(&mem->invalid, 0); } ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); if (ret) { @@ -2121,6 +2134,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } } + if (failed_size) + pr_debug("0x%lx/0x%lx restore fail\n", failed_size, total_size); + /* Update page directories */ ret = process_update_pds(process_info, &sync_obj); if (ret) { -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx