Userptr restore may have concurrent userptr invalidation after hmm_vma_fault adds the range to the hmm->ranges list, needs call hmm_vma_range_done to remove the range from hmm->ranges list first, then reschedule the restore worker. Otherwise hmm_vma_fault will add same range to the list, this will cause loop in the list because range->next point to range itself. Add function untrack_invalid_user_pages to reduce code duplication. Change-Id: I31407739dc10554f8e418c7a0e0415d3d95552f1 Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 28 ++++++++++++++----- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 314c048fcac6..783d760ccfe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1731,6 +1731,23 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, return 0; } +/* Remove invalid userptr BOs from hmm track list + * + * Stop HMM track the userptr update + */ +static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + bo = mem->bo; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } +} + /* Validate invalid userptr BOs * * Validates BOs on the userptr_inval_list, and moves them back to the @@ -1848,12 +1865,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) out_free: kfree(pd_bo_list_entries); out_no_mem: - list_for_each_entry_safe(mem, tmp_mem, - &process_info->userptr_inval_list, - validate_list.head) { - bo = mem->bo; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - } + untrack_invalid_user_pages(process_info); return ret; } @@ -1897,8 +1909,10 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) * and we can just restart the queues. */ if (!list_empty(&process_info->userptr_inval_list)) { - if (atomic_read(&process_info->evicted_bos) != evicted_bos) + if (atomic_read(&process_info->evicted_bos) != evicted_bos) { + untrack_invalid_user_pages(process_info); goto unlock_out; /* Concurrent eviction, try again */ + } if (validate_invalid_user_pages(process_info)) goto unlock_out; -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx