On 2019-06-14 9:52 p.m., Yang, Philip wrote: > Under memory pressure, hmm_range_fault may return error code -ENOMEM > or -EBUSY, change pr_info to pr_debug to remove unnecessary kernel log > message because we will retry restore again. > > Call get_user_pages_done if TTM get user pages failed will have > WARN_ONCE kernel calling stack dump log. > > Change-Id: I086f92944630f9d1a70365c00417cb9440662464 > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 +++---------------- > 1 file changed, 6 insertions(+), 32 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 74e86952553f..10abae398e51 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1731,35 +1731,17 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, > ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, > bo->tbo.ttm->pages); > if (ret) { > - bo->tbo.ttm->pages[0] = NULL; > - pr_info("%s: Failed to get user pages: %d\n", > + pr_debug("%s: Failed to get user pages: %d\n", > __func__, ret); > - /* Pretend it succeeded. It will fail later > - * with a VM fault if the GPU tries to access > - * it. Better than hanging indefinitely with > - * stalled user mode queues. > - */ > - } > - } > - > - return 0; > -} > > -/* Remove invalid userptr BOs from hmm track list > - * > - * Stop HMM track the userptr update > - */ > -static void untrack_invalid_user_pages(struct amdkfd_process_info *process_info) > -{ > - struct kgd_mem *mem, *tmp_mem; > - struct amdgpu_bo *bo; > + /* Return error -EBUSY or -ENOMEM, retry restore */ > + return ret; > + } > > - list_for_each_entry_safe(mem, tmp_mem, > - &process_info->userptr_inval_list, > - validate_list.head) { > - bo = mem->bo; > amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > } > + > + return 0; > } > > /* Validate invalid userptr BOs > @@ -1841,13 +1823,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > list_move_tail(&mem->validate_list.head, > &process_info->userptr_valid_list); > > - /* Stop HMM track the userptr update. We dont check the return > - * value for concurrent CPU page table update because we will > - * reschedule the restore worker if process_info->evicted_bos > - * is updated. > - */ > - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > - > /* Update mapping. If the BO was not validated > * (because we couldn't get user pages), this will > * clear the page table entries, which will result in > @@ -1946,7 +1921,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) > } > > unlock_out: > - untrack_invalid_user_pages(process_info); > mutex_unlock(&process_info->lock); > mmput(mm); > put_task_struct(usertask); _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx