Am 06.02.19 um 16:53 schrieb Yang, Philip: > >> + /* If userptr are updated after amdgpu_cs_parser_bos(), restart > >> cs */ > >> amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > >> struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > >> - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { > >> - r = -ERESTARTSYS; > >> - goto error_abort; > >> - } > >> + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > > > > Just abort when you see the first one with a problem here. > > > > There is no value in checking all of them. > > > No, amdgpu_ttm_tt_get_user_pages_done calls hmm_vma_range_done to stop > HMM track and free range structure memory, this needs go through all > userptr BOs. Well that actually sounds like an ugliness in the hmm_vma_range_done interface. Need to double check the code and maybe sync up with Jerome if that is really a good idea. But that won't affect you work, so feel free to go ahead for now. > > >> + > >> + if (r == -EAGAIN) { > >> + if (!--tries) { > >> + DRM_ERROR("Possible deadlock? Retry too many times\n"); > >> + return -EDEADLK; > >> + } > >> + goto restart; > >> + } > >> + > > > > I would still say to better just return to userspace here. > > > > Because of the way HMM works 10 retries might not be sufficient any more. > > > Yes, it looks better to handle retry from user space. The extra sys call > overhead can be ignored because this does not happen all the time. I > will submit new patch for review. Thanks, apart from the issues mentioned so far this looks good to me. Feel free to add an Acked-by: Christian König <christian.koenig@xxxxxxx> to the patch. I still need to double check if we don't have any locking problems (inversions, missing locks etc...). When this is done I can also give you and rb for the patch. Christian. > > Thanks, > Philip > > On 2019-02-06 4:20 a.m., Christian König wrote: >> Am 05.02.19 um 23:00 schrieb Yang, Philip: >>> Use HMM helper function hmm_vma_fault() to get physical pages backing >>> userptr and start CPU page table update track of those pages. Then use >>> hmm_vma_range_done() to check if those pages are updated before >>> amdgpu_cs_submit for gfx or before user queues are resumed for kfd. >>> >>> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart >>> from scratch, for kfd, restore worker is rescheduled to retry. >>> >>> HMM simplify the CPU page table concurrent update check, so remove >>> guptasklock, mmu_invalidations, last_set_pages fields from >>> amdgpu_ttm_tt struct. >>> >>> HMM does not pin the page (increase page ref count), so remove related >>> operations like release_pages(), put_page(), mark_page_dirty(). >>> >>> Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 >>> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - >>> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++------- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++--------- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 178 +++++++----------- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- >>> 9 files changed, 198 insertions(+), 282 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> index 0b31a1859023..0e1711a75b68 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h >>> @@ -61,7 +61,6 @@ struct kgd_mem { >>> atomic_t invalid; >>> struct amdkfd_process_info *process_info; >>> - struct page **user_pages; >>> struct amdgpu_sync sync; >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> index d7b10d79f1de..ae2d838d31ea 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >>> @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, >>> struct mm_struct *mm, >>> goto out; >>> } >>> - /* If no restore worker is running concurrently, user_pages >>> - * should not be allocated >>> - */ >>> - WARN(mem->user_pages, "Leaking user_pages array"); >>> - >>> - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, >>> - sizeof(struct page *), >>> - GFP_KERNEL | __GFP_ZERO); >>> - if (!mem->user_pages) { >>> - pr_err("%s: Failed to allocate pages array\n", __func__); >>> - ret = -ENOMEM; >>> - goto unregister_out; >>> - } >>> - >>> - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); >>> + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); >>> if (ret) { >>> pr_err("%s: Failed to get user pages: %d\n", __func__, ret); >>> - goto free_out; >>> + goto unregister_out; >>> } >>> - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); >>> - >>> ret = amdgpu_bo_reserve(bo, true); >>> if (ret) { >>> pr_err("%s: Failed to reserve BO\n", __func__); >>> @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, >>> struct mm_struct *mm, >>> amdgpu_bo_unreserve(bo); >>> release_out: >>> - if (ret) >>> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); >>> -free_out: >>> - kvfree(mem->user_pages); >>> - mem->user_pages = NULL; >>> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >>> unregister_out: >>> if (ret) >>> amdgpu_mn_unregister(bo); >>> @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, >>> ctx->kfd_bo.priority = 0; >>> ctx->kfd_bo.tv.bo = &bo->tbo; >>> ctx->kfd_bo.tv.num_shared = 1; >>> - ctx->kfd_bo.user_pages = NULL; >>> list_add(&ctx->kfd_bo.tv.head, &ctx->list); >>> amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); >>> @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem >>> *mem, >>> ctx->kfd_bo.priority = 0; >>> ctx->kfd_bo.tv.bo = &bo->tbo; >>> ctx->kfd_bo.tv.num_shared = 1; >>> - ctx->kfd_bo.user_pages = NULL; >>> list_add(&ctx->kfd_bo.tv.head, &ctx->list); >>> i = 0; >>> @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( >>> list_del(&bo_list_entry->head); >>> mutex_unlock(&process_info->lock); >>> - /* Free user pages if necessary */ >>> - if (mem->user_pages) { >>> - pr_debug("%s: Freeing user_pages array\n", __func__); >>> - if (mem->user_pages[0]) >>> - release_pages(mem->user_pages, >>> - mem->bo->tbo.ttm->num_pages); >>> - kvfree(mem->user_pages); >>> - } >>> - >>> ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); >>> if (unlikely(ret)) >>> return ret; >>> @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct >>> amdkfd_process_info *process_info, >>> bo = mem->bo; >>> - if (!mem->user_pages) { >>> - mem->user_pages = >>> - kvmalloc_array(bo->tbo.ttm->num_pages, >>> - sizeof(struct page *), >>> - GFP_KERNEL | __GFP_ZERO); >>> - if (!mem->user_pages) { >>> - pr_err("%s: Failed to allocate pages array\n", >>> - __func__); >>> - return -ENOMEM; >>> - } >>> - } else if (mem->user_pages[0]) { >>> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); >>> - } >>> - >>> /* Get updated user pages */ >>> ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, >>> - mem->user_pages); >>> + bo->tbo.ttm->pages); >>> if (ret) { >>> - mem->user_pages[0] = NULL; >>> + bo->tbo.ttm->pages[0] = NULL; >>> pr_info("%s: Failed to get user pages: %d\n", >>> __func__, ret); >>> /* Pretend it succeeded. It will fail later >>> @@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct >>> amdkfd_process_info *process_info, >>> * stalled user mode queues. >>> */ >>> } >>> - >>> - /* Mark the BO as valid unless it was invalidated >>> - * again concurrently >>> - */ >>> - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) >>> - return -EAGAIN; >>> } >>> return 0; >>> @@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct >>> amdkfd_process_info *process_info) >>> GFP_KERNEL); >>> if (!pd_bo_list_entries) { >>> pr_err("%s: Failed to allocate PD BO list entries\n", >>> __func__); >>> - return -ENOMEM; >>> + ret = -ENOMEM; >>> + goto out_no_mem; >>> } >>> INIT_LIST_HEAD(&resv_list); >>> @@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct >>> amdkfd_process_info *process_info) >>> ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, >>> &duplicates); >>> WARN(!list_empty(&duplicates), "Duplicates should be empty"); >>> if (ret) >>> - goto out; >>> + goto out_free; >>> amdgpu_sync_create(&sync); >>> @@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct >>> amdkfd_process_info *process_info) >>> bo = mem->bo; >>> - /* Copy pages array and validate the BO if we got user pages */ >>> - if (mem->user_pages[0]) { >>> - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, >>> - mem->user_pages); >>> + /* Validate the BO if we got user pages */ >>> + if (bo->tbo.ttm->pages[0]) { >>> amdgpu_bo_placement_from_domain(bo, mem->domain); >>> ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); >>> if (ret) { >>> @@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct >>> amdkfd_process_info *process_info) >>> } >>> } >>> - /* Validate succeeded, now the BO owns the pages, free >>> - * our copy of the pointer array. Put this BO back on >>> - * the userptr_valid_list. If we need to revalidate >>> - * it, we need to start from scratch. >>> - */ >>> - kvfree(mem->user_pages); >>> - mem->user_pages = NULL; >>> list_move_tail(&mem->validate_list.head, >>> &process_info->userptr_valid_list); >>> + /* Stop HMM track the userptr update. We dont check the return >>> + * value for concurrent CPU page table update because we will >>> + * reschedule the restore worker if process_info->evicted_bos >>> + * is updated. >>> + */ >>> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >>> + >>> /* Update mapping. If the BO was not validated >>> * (because we couldn't get user pages), this will >>> * clear the page table entries, which will result in >>> @@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct >>> amdkfd_process_info *process_info) >>> ttm_eu_backoff_reservation(&ticket, &resv_list); >>> amdgpu_sync_wait(&sync, false); >>> amdgpu_sync_free(&sync); >>> -out: >>> +out_free: >>> kfree(pd_bo_list_entries); >>> +out_no_mem: >>> + list_for_each_entry_safe(mem, tmp_mem, >>> + &process_info->userptr_inval_list, >>> + validate_list.head) { >>> + bo = mem->bo; >>> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >>> + } >>> return ret; >>> } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>> index 7c5f5d1601e6..a130e766cbdb 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h >>> @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { >>> struct amdgpu_bo_va *bo_va; >>> uint32_t priority; >>> struct page **user_pages; >>> - int user_invalidated; >>> + bool user_invalidated; >>> }; >>> struct amdgpu_bo_list { >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> index 52a5e4fdc95b..70bdf9ff0bab 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c >>> @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct >>> amdgpu_cs_parser *p, >>> p->uf_entry.tv.bo = &bo->tbo; >>> /* One for TTM and one for the CS job */ >>> p->uf_entry.tv.num_shared = 2; >>> - p->uf_entry.user_pages = NULL; >>> drm_gem_object_put_unlocked(gobj); >>> @@ -540,14 +539,14 @@ static int amdgpu_cs_list_validate(struct >>> amdgpu_cs_parser *p, >>> if (usermm && usermm != current->mm) >>> return -EPERM; >>> - /* Check if we have user pages and nobody bound the BO >>> already */ >>> - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && >>> - lobj->user_pages) { >>> + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && >>> + lobj->user_invalidated && lobj->user_pages) { >>> amdgpu_bo_placement_from_domain(bo, >>> AMDGPU_GEM_DOMAIN_CPU); >>> r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); >>> if (r) >>> return r; >>> + >>> amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, >>> lobj->user_pages); >>> binding_userptr = true; >>> @@ -578,7 +577,6 @@ static int amdgpu_cs_parser_bos(struct >>> amdgpu_cs_parser *p, >>> struct amdgpu_bo *gds; >>> struct amdgpu_bo *gws; >>> struct amdgpu_bo *oa; >>> - unsigned tries = 10; >>> int r; >>> INIT_LIST_HEAD(&p->validated); >>> @@ -614,79 +612,45 @@ static int amdgpu_cs_parser_bos(struct >>> amdgpu_cs_parser *p, >>> if (p->uf_entry.tv.bo && >>> !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) >>> list_add(&p->uf_entry.tv.head, &p->validated); >>> - while (1) { >>> - struct list_head need_pages; >>> - >>> - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, >>> - &duplicates); >>> - if (unlikely(r != 0)) { >>> - if (r != -ERESTARTSYS) >>> - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); >>> - goto error_free_pages; >>> - } >>> - >>> - INIT_LIST_HEAD(&need_pages); >>> - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { >>> - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); >>> - >>> - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, >>> - &e->user_invalidated) && e->user_pages) { >>> - >>> - /* We acquired a page array, but somebody >>> - * invalidated it. Free it and try again >>> - */ >>> - release_pages(e->user_pages, >>> - bo->tbo.ttm->num_pages); >>> - kvfree(e->user_pages); >>> - e->user_pages = NULL; >>> - } >>> - >>> - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && >>> - !e->user_pages) { >>> - list_del(&e->tv.head); >>> - list_add(&e->tv.head, &need_pages); >>> - >>> - amdgpu_bo_unreserve(bo); >>> - } >>> + /* Get userptr backing pages. If pages are updated after registered >>> + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do >>> + * amdgpu_ttm_backend_bind() to flush and invalidate new pages >>> + */ >>> + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { >>> + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); >>> + bool userpage_invalidated = false; >>> + int i; >>> + >>> + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, >>> + sizeof(struct page *), >>> + GFP_KERNEL | __GFP_ZERO); >>> + if (!e->user_pages) { >>> + DRM_ERROR("calloc failure\n"); >>> + return -ENOMEM; >>> } >>> - if (list_empty(&need_pages)) >>> - break; >>> - >>> - /* Unreserve everything again. */ >>> - ttm_eu_backoff_reservation(&p->ticket, &p->validated); >>> - >>> - /* We tried too many times, just abort */ >>> - if (!--tries) { >>> - r = -EDEADLK; >>> - DRM_ERROR("deadlock in %s\n", __func__); >>> - goto error_free_pages; >>> + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); >>> + if (r) { >>> + kvfree(e->user_pages); >>> + e->user_pages = NULL; >>> + return r; >>> } >>> - /* Fill the page arrays for all userptrs. */ >>> - list_for_each_entry(e, &need_pages, tv.head) { >>> - struct ttm_tt *ttm = e->tv.bo->ttm; >>> - >>> - e->user_pages = kvmalloc_array(ttm->num_pages, >>> - sizeof(struct page*), >>> - GFP_KERNEL | __GFP_ZERO); >>> - if (!e->user_pages) { >>> - r = -ENOMEM; >>> - DRM_ERROR("calloc failure in %s\n", __func__); >>> - goto error_free_pages; >>> - } >>> - >>> - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); >>> - if (r) { >>> - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); >>> - kvfree(e->user_pages); >>> - e->user_pages = NULL; >>> - goto error_free_pages; >>> + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { >>> + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { >>> + userpage_invalidated = true; >>> + break; >>> } >>> } >>> + e->user_invalidated = userpage_invalidated; >>> + } >>> - /* And try again. */ >>> - list_splice(&need_pages, &p->validated); >>> + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, >>> + &duplicates); >>> + if (unlikely(r != 0)) { >>> + if (r != -ERESTARTSYS) >>> + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); >>> + goto out; >>> } >>> amdgpu_cs_get_threshold_for_moves(p->adev, >>> &p->bytes_moved_threshold, >>> @@ -755,17 +719,7 @@ static int amdgpu_cs_parser_bos(struct >>> amdgpu_cs_parser *p, >>> error_validate: >>> if (r) >>> ttm_eu_backoff_reservation(&p->ticket, &p->validated); >>> - >>> -error_free_pages: >>> - >>> - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { >>> - if (!e->user_pages) >>> - continue; >>> - >>> - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); >>> - kvfree(e->user_pages); >>> - } >>> - >>> +out: >>> return r; >>> } >>> @@ -1224,8 +1178,7 @@ static int amdgpu_cs_submit(struct >>> amdgpu_cs_parser *p, >>> struct amdgpu_bo_list_entry *e; >>> struct amdgpu_job *job; >>> uint64_t seq; >>> - >>> - int r; >>> + int r = 0; >>> job = p->job; >>> p->job = NULL; >>> @@ -1234,15 +1187,21 @@ static int amdgpu_cs_submit(struct >>> amdgpu_cs_parser *p, >>> if (r) >>> goto error_unlock; >>> - /* No memory allocation is allowed while holding the mn lock */ >>> + /* No memory allocation is allowed while holding the mn lock. >>> + * p->mn is hold until amdgpu_cs_submit is finished and fence is >>> added >>> + * to BOs. >>> + */ >>> amdgpu_mn_lock(p->mn); >>> + >>> + /* If userptr are updated after amdgpu_cs_parser_bos(), restart >>> cs */ >>> amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { >>> struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); >>> - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { >>> - r = -ERESTARTSYS; >>> - goto error_abort; >>> - } >>> + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >> Just abort when you see the first one with a problem here. >> >> There is no value in checking all of them. >> >>> + } >>> + if (r) { >>> + r = -EAGAIN; >>> + goto error_abort; >>> } >>> job->owner = p->filp; >>> @@ -1289,14 +1248,20 @@ static int amdgpu_cs_submit(struct >>> amdgpu_cs_parser *p, >>> int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct >>> drm_file *filp) >>> { >>> struct amdgpu_device *adev = dev->dev_private; >>> - union drm_amdgpu_cs *cs = data; >>> - struct amdgpu_cs_parser parser = {}; >>> - bool reserved_buffers = false; >>> + union drm_amdgpu_cs *cs; >>> + struct amdgpu_cs_parser parser; >>> + bool reserved_buffers; >>> + int tries = 10; >>> int i, r; >>> if (!adev->accel_working) >>> return -EBUSY; >>> +restart: >>> + memset(&parser, 0, sizeof(parser)); >>> + cs = data; >>> + reserved_buffers = false; >>> + >>> parser.adev = adev; >>> parser.filp = filp; >>> @@ -1338,6 +1303,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, >>> void *data, struct drm_file *filp) >>> out: >>> amdgpu_cs_parser_fini(&parser, r, reserved_buffers); >>> + >>> + if (r == -EAGAIN) { >>> + if (!--tries) { >>> + DRM_ERROR("Possible deadlock? Retry too many times\n"); >>> + return -EDEADLK; >>> + } >>> + goto restart; >>> + } >>> + >> I would still say to better just return to userspace here. >> >> Because of the way HMM works 10 retries might not be sufficient any more. >> >> Christian. >> >>> return r; >>> } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> index d21dd2f369da..555285e329ed 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c >>> @@ -329,26 +329,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device >>> *dev, void *data, >>> r = amdgpu_bo_reserve(bo, true); >>> if (r) >>> - goto free_pages; >>> + goto user_pages_done; >>> amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); >>> r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); >>> amdgpu_bo_unreserve(bo); >>> if (r) >>> - goto free_pages; >>> + goto user_pages_done; >>> } >>> r = drm_gem_handle_create(filp, gobj, &handle); >>> - /* drop reference from allocate - handle holds it now */ >>> - drm_gem_object_put_unlocked(gobj); >>> if (r) >>> - return r; >>> + goto user_pages_done; >>> args->handle = handle; >>> - return 0; >>> -free_pages: >>> - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); >>> +user_pages_done: >>> + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) >>> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >>> release_object: >>> drm_gem_object_put_unlocked(gobj); >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> index e356867d2308..fa2516016c43 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> @@ -222,8 +222,6 @@ static void amdgpu_mn_invalidate_node(struct >>> amdgpu_mn_node *node, >>> true, false, MAX_SCHEDULE_TIMEOUT); >>> if (r <= 0) >>> DRM_ERROR("(%ld) failed to wait for user bo\n", r); >>> - >>> - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); >>> } >>> } >>> @@ -504,3 +502,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) >>> mutex_unlock(&adev->mn_lock); >>> } >>> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ >>> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { >>> + (1 << 0), /* HMM_PFN_VALID */ >>> + (1 << 1), /* HMM_PFN_WRITE */ >>> + 0 /* HMM_PFN_DEVICE_PRIVATE */ >>> +}; >>> + >>> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { >>> + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ >>> + 0, /* HMM_PFN_NONE */ >>> + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ >>> +}; >>> + >>> +void amdgpu_hmm_init_range(struct hmm_range *range) >>> +{ >>> + if (range) { >>> + range->flags = hmm_range_flags; >>> + range->values = hmm_range_values; >>> + range->pfn_shift = PAGE_SHIFT; >>> + range->pfns = NULL; >>> + INIT_LIST_HEAD(&range->list); >>> + } >>> +} >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> index 0a51fd00021c..4803e216e174 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >>> @@ -25,9 +25,10 @@ >>> #define __AMDGPU_MN_H__ >>> /* >>> - * MMU Notifier >>> + * HMM mirror >>> */ >>> struct amdgpu_mn; >>> +struct hmm_range; >>> enum amdgpu_mn_type { >>> AMDGPU_MN_TYPE_GFX, >>> @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device >>> *adev, >>> enum amdgpu_mn_type type); >>> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); >>> void amdgpu_mn_unregister(struct amdgpu_bo *bo); >>> +void amdgpu_hmm_init_range(struct hmm_range *range); >>> #else >>> static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} >>> static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> index 73e71e61dc99..1e675048f790 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> @@ -43,6 +43,7 @@ >>> #include <linux/pagemap.h> >>> #include <linux/debugfs.h> >>> #include <linux/iommu.h> >>> +#include <linux/hmm.h> >>> #include "amdgpu.h" >>> #include "amdgpu_object.h" >>> #include "amdgpu_trace.h" >>> @@ -705,98 +706,102 @@ static unsigned long >>> amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, >>> /* >>> * TTM backend functions. >>> */ >>> -struct amdgpu_ttm_gup_task_list { >>> - struct list_head list; >>> - struct task_struct *task; >>> -}; >>> - >>> struct amdgpu_ttm_tt { >>> struct ttm_dma_tt ttm; >>> u64 offset; >>> uint64_t userptr; >>> struct task_struct *usertask; >>> uint32_t userflags; >>> - spinlock_t guptasklock; >>> - struct list_head guptasks; >>> - atomic_t mmu_invalidations; >>> - uint32_t last_set_pages; >>> + struct hmm_range range; >>> }; >>> /** >>> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a >>> USERPTR >>> - * pointer to memory >>> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that >>> back user >>> + * memory and start HMM tracking CPU page table update >>> * >>> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). >>> - * This provides a wrapper around the get_user_pages() call to provide >>> - * device accessible pages that back user memory. >>> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once >>> and only >>> + * once afterwards to stop HMM tracking >>> */ >>> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page >>> **pages) >>> { >>> struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> struct mm_struct *mm = gtt->usertask->mm; >>> - unsigned int flags = 0; >>> - unsigned pinned = 0; >>> - int r; >>> + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; >>> + struct hmm_range *range = >t->range; >>> + int r = 0, i; >>> if (!mm) /* Happens during process shutdown */ >>> return -ESRCH; >>> - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) >>> - flags |= FOLL_WRITE; >>> + amdgpu_hmm_init_range(range); >>> down_read(&mm->mmap_sem); >>> - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { >>> - /* >>> - * check that we only use anonymous memory to prevent problems >>> - * with writeback >>> - */ >>> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; >>> - struct vm_area_struct *vma; >>> + range->vma = find_vma(mm, gtt->userptr); >>> + if (!range_in_vma(range->vma, gtt->userptr, end)) >>> + r = -EFAULT; >>> + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && >>> + range->vma->vm_file) >>> + r = -EPERM; >>> + if (r) >>> + goto out; >>> - vma = find_vma(mm, gtt->userptr); >>> - if (!vma || vma->vm_file || vma->vm_end < end) { >>> - up_read(&mm->mmap_sem); >>> - return -EPERM; >>> - } >>> + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), >>> + GFP_KERNEL); >>> + if (range->pfns == NULL) { >>> + r = -ENOMEM; >>> + goto out; >>> } >>> + range->start = gtt->userptr; >>> + range->end = end; >>> - /* loop enough times using contiguous pages of memory */ >>> - do { >>> - unsigned num_pages = ttm->num_pages - pinned; >>> - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; >>> - struct page **p = pages + pinned; >>> - struct amdgpu_ttm_gup_task_list guptask; >>> + range->pfns[0] = range->flags[HMM_PFN_VALID]; >>> + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? >>> + 0 : range->flags[HMM_PFN_WRITE]; >>> + for (i = 1; i < ttm->num_pages; i++) >>> + range->pfns[i] = range->pfns[0]; >>> - guptask.task = current; >>> - spin_lock(>t->guptasklock); >>> - list_add(&guptask.list, >t->guptasks); >>> - spin_unlock(>t->guptasklock); >>> + /* This may trigger page table update */ >>> + r = hmm_vma_fault(range, true); >>> + if (r) >>> + goto out_free_pfns; >>> - if (mm == current->mm) >>> - r = get_user_pages(userptr, num_pages, flags, p, NULL); >>> - else >>> - r = get_user_pages_remote(gtt->usertask, >>> - mm, userptr, num_pages, >>> - flags, p, NULL, NULL); >>> + up_read(&mm->mmap_sem); >>> - spin_lock(>t->guptasklock); >>> - list_del(&guptask.list); >>> - spin_unlock(>t->guptasklock); >>> + for (i = 0; i < ttm->num_pages; i++) >>> + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); >>> - if (r < 0) >>> - goto release_pages; >>> + return 0; >>> - pinned += r; >>> +out_free_pfns: >>> + kvfree(range->pfns); >>> + range->pfns = NULL; >>> +out: >>> + up_read(&mm->mmap_sem); >>> + return r; >>> +} >>> - } while (pinned < ttm->num_pages); >>> +/** >>> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page >>> table change >>> + * Check if the pages backing this ttm range have been invalidated >>> + * >>> + * Returns: true if pages are still valid >>> + */ >>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) >>> +{ >>> + struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> + bool r = false; >>> - up_read(&mm->mmap_sem); >>> - return 0; >>> + if (!gtt || !gtt->userptr) >>> + return false; >>> + >>> + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); >>> + if (gtt->range.pfns) { >>> + r = hmm_vma_range_done(>t->range); >>> + kvfree(gtt->range.pfns); >>> + gtt->range.pfns = NULL; >>> + } >>> -release_pages: >>> - release_pages(pages, pinned); >>> - up_read(&mm->mmap_sem); >>> return r; >>> } >>> @@ -809,16 +814,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt >>> *ttm, struct page **pages) >>> */ >>> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page >>> **pages) >>> { >>> - struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> unsigned i; >>> - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); >>> - for (i = 0; i < ttm->num_pages; ++i) { >>> - if (ttm->pages[i]) >>> - put_page(ttm->pages[i]); >>> - >>> + for (i = 0; i < ttm->num_pages; ++i) >>> ttm->pages[i] = pages ? pages[i] : NULL; >>> - } >>> } >>> /** >>> @@ -903,10 +902,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct >>> ttm_tt *ttm) >>> /* unmap the pages mapped to the device */ >>> dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); >>> - /* mark the pages as dirty */ >>> - amdgpu_ttm_tt_mark_user_pages(ttm); >>> - >>> sg_free_table(ttm->sg); >>> + >>> + if (gtt->range.pfns && >>> + ttm->pages[0] == hmm_pfn_to_page(>t->range, >>> gtt->range.pfns[0])) >>> + WARN_ONCE(1, "Missing get_user_page_done\n"); >>> } >>> int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, >>> @@ -1256,11 +1256,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt >>> *ttm, uint64_t addr, >>> gtt->usertask = current->group_leader; >>> get_task_struct(gtt->usertask); >>> - spin_lock_init(>t->guptasklock); >>> - INIT_LIST_HEAD(>t->guptasks); >>> - atomic_set(>t->mmu_invalidations, 0); >>> - gtt->last_set_pages = 0; >>> - >>> return 0; >>> } >>> @@ -1289,7 +1284,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt >>> *ttm, unsigned long start, >>> unsigned long end) >>> { >>> struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> - struct amdgpu_ttm_gup_task_list *entry; >>> unsigned long size; >>> if (gtt == NULL || !gtt->userptr) >>> @@ -1302,48 +1296,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct >>> ttm_tt *ttm, unsigned long start, >>> if (gtt->userptr > end || gtt->userptr + size <= start) >>> return false; >>> - /* Search the lists of tasks that hold this mapping and see >>> - * if current is one of them. If it is return false. >>> - */ >>> - spin_lock(>t->guptasklock); >>> - list_for_each_entry(entry, >t->guptasks, list) { >>> - if (entry->task == current) { >>> - spin_unlock(>t->guptasklock); >>> - return false; >>> - } >>> - } >>> - spin_unlock(>t->guptasklock); >>> - >>> - atomic_inc(>t->mmu_invalidations); >>> - >>> return true; >>> } >>> /** >>> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been >>> invalidated? >>> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? >>> */ >>> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, >>> - int *last_invalidated) >>> -{ >>> - struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> - int prev_invalidated = *last_invalidated; >>> - >>> - *last_invalidated = atomic_read(>t->mmu_invalidations); >>> - return prev_invalidated != *last_invalidated; >>> -} >>> - >>> -/** >>> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this >>> ttm_tt object >>> - * been invalidated since the last time they've been set? >>> - */ >>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) >>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) >>> { >>> struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> if (gtt == NULL || !gtt->userptr) >>> return false; >>> - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; >>> + return true; >>> } >>> /** >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> index b5b2d101f7db..8988c87fff9d 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >>> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object >>> *bo); >>> int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); >>> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page >>> **pages); >>> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); >>> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page >>> **pages); >>> void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); >>> int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, >>> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt >>> *ttm, unsigned long start, >>> unsigned long end); >>> bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, >>> int *last_invalidated); >>> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); >>> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); >>> bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); >>> uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct >>> ttm_mem_reg *mem); >>> uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct >>> ttm_tt *ttm, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx