Workstation application ANSA/META get this error dmesg: [drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-16) This is caused by: 1. create a 256MB buffer in invisible VRAM 2. CPU map the buffer and access it causes vm_fault and try to move it to visible VRAM 3. force visible VRAM space and traverse all VRAM bos to check if evicting this bo is valuable 4. when checking a VM bo (in invisible VRAM), amdgpu_vm_evictable() will set amdgpu_vm->evicting, but latter due to not in visible VRAM, won't really evict it so not add it to amdgpu_vm->evicted 5. before next CS to clear the amdgpu_vm->evicting, user VM ops ioctl will pass amdgpu_vm_ready() (check amdgpu_vm->evicted) but fail in amdgpu_vm_bo_update_mapping() (check amdgpu_vm->evicting) and get this error log This error won't affect functionality as next CS will finish the waiting VM ops. But we'd better make the amdgpu_vm->evicting correctly reflact the vm status and clear the error log. Signed-off-by: Qiang Yu <qiang.yu@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 85 ++++++++++++++----------- 1 file changed, 47 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5a32ee66d8c8..88a27911054f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1306,45 +1306,11 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, return flags; } -/* - * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer - * object. - * - * Return true if eviction is sensible. Called by ttm_mem_evict_first() on - * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until - * it can find space for a new object and by ttm_bo_force_list_clean() which is - * used to clean out a memory space. - */ -static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, - const struct ttm_place *place) +static bool amdgpu_ttm_mem_eviction_valuable(struct ttm_buffer_object *bo, + const struct ttm_place *place) { unsigned long num_pages = bo->resource->num_pages; struct amdgpu_res_cursor cursor; - struct dma_resv_list *flist; - struct dma_fence *f; - int i; - - /* Swapout? */ - if (bo->resource->mem_type == TTM_PL_SYSTEM) - return true; - - if (bo->type == ttm_bo_type_kernel && - !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo))) - return false; - - /* If bo is a KFD BO, check if the bo belongs to the current process. - * If true, then return false as any KFD process needs all its BOs to - * be resident to run successfully - */ - flist = dma_resv_shared_list(bo->base.resv); - if (flist) { - for (i = 0; i < flist->shared_count; ++i) { - f = rcu_dereference_protected(flist->shared[i], - dma_resv_held(bo->base.resv)); - if (amdkfd_fence_check_mm(f, current->mm)) - return false; - } - } switch (bo->resource->mem_type) { case AMDGPU_PL_PREEMPT: @@ -1377,10 +1343,53 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, return false; default: - break; + return ttm_bo_eviction_valuable(bo, place); } +} - return ttm_bo_eviction_valuable(bo, place); +/* + * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer + * object. + * + * Return true if eviction is sensible. Called by ttm_mem_evict_first() on + * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until + * it can find space for a new object and by ttm_bo_force_list_clean() which is + * used to clean out a memory space. + */ +static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, + const struct ttm_place *place) +{ + struct dma_resv_list *flist; + struct dma_fence *f; + int i; + + /* Swapout? */ + if (bo->resource->mem_type == TTM_PL_SYSTEM) + return true; + + /* If bo is a KFD BO, check if the bo belongs to the current process. + * If true, then return false as any KFD process needs all its BOs to + * be resident to run successfully + */ + flist = dma_resv_shared_list(bo->base.resv); + if (flist) { + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + dma_resv_held(bo->base.resv)); + if (amdkfd_fence_check_mm(f, current->mm)) + return false; + } + } + + /* Check by different mem type. */ + if (!amdgpu_ttm_mem_eviction_valuable(bo, place)) + return false; + + /* VM bo should be checked at last because it will mark VM evicting. */ + if (bo->type == ttm_bo_type_kernel) + return amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)); + + return true; } static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, -- 2.25.1