Am 2021-10-22 um 1:06 p.m. schrieb Philip Yang: > The userptr can be unmapped by application and still registered to > driver, restore userptr work return user pages will get -EFAULT bad > address error. Pretend this error as succeed. GPU access this userptr > will have VM fault later, it is better than application soft hangs with > stalled user mode queues. > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> Reviewed-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > --- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 27 ++++++++++++------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ > 2 files changed, 20 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index cdf46bd0d8d5..6f01c6145a87 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -2041,19 +2041,26 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, > /* Get updated user pages */ > ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); > if (ret) { > - pr_debug("%s: Failed to get user pages: %d\n", > - __func__, ret); > + pr_debug("Failed %d to get user pages\n", ret); > + > + /* Return -EFAULT bad address error as success. It will > + * fail later with a VM fault if the GPU tries to access > + * it. Better than hanging indefinitely with stalled > + * user mode queues. > + * > + * Return other error -EBUSY or -ENOMEM to retry restore > + */ > + if (ret != -EFAULT) > + return ret; > + } else { > > - /* Return error -EBUSY or -ENOMEM, retry restore */ > - return ret; > + /* > + * FIXME: Cannot ignore the return code, must hold > + * notifier_lock > + */ > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > } > > - /* > - * FIXME: Cannot ignore the return code, must hold > - * notifier_lock > - */ > - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > - > /* Mark the BO as valid unless it was invalidated > * again concurrently. > */ > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index d784f8d3a834..ae6694f2c73d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -693,6 +693,9 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) > r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, > ttm->num_pages, >t->range, readonly, > false, NULL); > + if (r) > + pr_debug("failed %d to get user pages 0x%llx\n", r, start); > + > out_putmm: > mmput(mm); >