Ping Christian, any comments for the GEM and CS part changes? Thanks. Philip On 2019-01-10 12:02 p.m., Yang, Philip wrote: > Use HMM helper function hmm_vma_fault() to get physical pages backing > userptr and start CPU page table update track of those pages. Then use > hmm_vma_range_done() to check if those pages are updated before > amdgpu_cs_submit for gfx or before user queues are resumed for kfd. > > If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart > from scratch, for kfd, restore worker is rescheduled to retry. > > HMM simplify the CPU page table concurrent update check, so remove > guptasklock, mmu_invalidations, last_set_pages fields from > amdgpu_ttm_tt struct. > > HMM does not pin the page (increase page ref count), so remove related > operations like release_pages(), put_page(), mark_page_dirty(). > > Change-Id: I2e8c0c6f0d8c21e5596a32d7fc91762778bc9e67 > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 - > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 +++------- > drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 158 +++++++--------- > drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 173 ++++++++---------- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- > 9 files changed, 198 insertions(+), 277 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index 0b31a1859023..0e1711a75b68 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -61,7 +61,6 @@ struct kgd_mem { > > atomic_t invalid; > struct amdkfd_process_info *process_info; > - struct page **user_pages; > > struct amdgpu_sync sync; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index d7b10d79f1de..ae2d838d31ea 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -582,28 +582,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, > goto out; > } > > - /* If no restore worker is running concurrently, user_pages > - * should not be allocated > - */ > - WARN(mem->user_pages, "Leaking user_pages array"); > - > - mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, > - sizeof(struct page *), > - GFP_KERNEL | __GFP_ZERO); > - if (!mem->user_pages) { > - pr_err("%s: Failed to allocate pages array\n", __func__); > - ret = -ENOMEM; > - goto unregister_out; > - } > - > - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); > + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); > if (ret) { > pr_err("%s: Failed to get user pages: %d\n", __func__, ret); > - goto free_out; > + goto unregister_out; > } > > - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); > - > ret = amdgpu_bo_reserve(bo, true); > if (ret) { > pr_err("%s: Failed to reserve BO\n", __func__); > @@ -616,11 +600,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, > amdgpu_bo_unreserve(bo); > > release_out: > - if (ret) > - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); > -free_out: > - kvfree(mem->user_pages); > - mem->user_pages = NULL; > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > unregister_out: > if (ret) > amdgpu_mn_unregister(bo); > @@ -679,7 +659,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, > ctx->kfd_bo.priority = 0; > ctx->kfd_bo.tv.bo = &bo->tbo; > ctx->kfd_bo.tv.num_shared = 1; > - ctx->kfd_bo.user_pages = NULL; > list_add(&ctx->kfd_bo.tv.head, &ctx->list); > > amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); > @@ -743,7 +722,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, > ctx->kfd_bo.priority = 0; > ctx->kfd_bo.tv.bo = &bo->tbo; > ctx->kfd_bo.tv.num_shared = 1; > - ctx->kfd_bo.user_pages = NULL; > list_add(&ctx->kfd_bo.tv.head, &ctx->list); > > i = 0; > @@ -1371,15 +1349,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > list_del(&bo_list_entry->head); > mutex_unlock(&process_info->lock); > > - /* Free user pages if necessary */ > - if (mem->user_pages) { > - pr_debug("%s: Freeing user_pages array\n", __func__); > - if (mem->user_pages[0]) > - release_pages(mem->user_pages, > - mem->bo->tbo.ttm->num_pages); > - kvfree(mem->user_pages); > - } > - > ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); > if (unlikely(ret)) > return ret; > @@ -1855,25 +1824,11 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, > > bo = mem->bo; > > - if (!mem->user_pages) { > - mem->user_pages = > - kvmalloc_array(bo->tbo.ttm->num_pages, > - sizeof(struct page *), > - GFP_KERNEL | __GFP_ZERO); > - if (!mem->user_pages) { > - pr_err("%s: Failed to allocate pages array\n", > - __func__); > - return -ENOMEM; > - } > - } else if (mem->user_pages[0]) { > - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); > - } > - > /* Get updated user pages */ > ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, > - mem->user_pages); > + bo->tbo.ttm->pages); > if (ret) { > - mem->user_pages[0] = NULL; > + bo->tbo.ttm->pages[0] = NULL; > pr_info("%s: Failed to get user pages: %d\n", > __func__, ret); > /* Pretend it succeeded. It will fail later > @@ -1882,12 +1837,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, > * stalled user mode queues. > */ > } > - > - /* Mark the BO as valid unless it was invalidated > - * again concurrently > - */ > - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) > - return -EAGAIN; > } > > return 0; > @@ -1917,7 +1866,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > GFP_KERNEL); > if (!pd_bo_list_entries) { > pr_err("%s: Failed to allocate PD BO list entries\n", __func__); > - return -ENOMEM; > + ret = -ENOMEM; > + goto out_no_mem; > } > > INIT_LIST_HEAD(&resv_list); > @@ -1941,7 +1891,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); > WARN(!list_empty(&duplicates), "Duplicates should be empty"); > if (ret) > - goto out; > + goto out_free; > > amdgpu_sync_create(&sync); > > @@ -1967,10 +1917,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > > bo = mem->bo; > > - /* Copy pages array and validate the BO if we got user pages */ > - if (mem->user_pages[0]) { > - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, > - mem->user_pages); > + /* Validate the BO if we got user pages */ > + if (bo->tbo.ttm->pages[0]) { > amdgpu_bo_placement_from_domain(bo, mem->domain); > ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > if (ret) { > @@ -1979,16 +1927,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > } > } > > - /* Validate succeeded, now the BO owns the pages, free > - * our copy of the pointer array. Put this BO back on > - * the userptr_valid_list. If we need to revalidate > - * it, we need to start from scratch. > - */ > - kvfree(mem->user_pages); > - mem->user_pages = NULL; > list_move_tail(&mem->validate_list.head, > &process_info->userptr_valid_list); > > + /* Stop HMM track the userptr update. We dont check the return > + * value for concurrent CPU page table update because we will > + * reschedule the restore worker if process_info->evicted_bos > + * is updated. > + */ > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > + > /* Update mapping. If the BO was not validated > * (because we couldn't get user pages), this will > * clear the page table entries, which will result in > @@ -2022,8 +1970,15 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) > ttm_eu_backoff_reservation(&ticket, &resv_list); > amdgpu_sync_wait(&sync, false); > amdgpu_sync_free(&sync); > -out: > +out_free: > kfree(pd_bo_list_entries); > +out_no_mem: > + list_for_each_entry_safe(mem, tmp_mem, > + &process_info->userptr_inval_list, > + validate_list.head) { > + bo = mem->bo; > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > + } > > return ret; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > index 7c5f5d1601e6..a130e766cbdb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h > @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { > struct amdgpu_bo_va *bo_va; > uint32_t priority; > struct page **user_pages; > - int user_invalidated; > + bool user_invalidated; > }; > > struct amdgpu_bo_list { > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 1c49b8266d69..451a1fab17d6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -52,7 +52,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, > p->uf_entry.tv.bo = &bo->tbo; > /* One for TTM and one for the CS job */ > p->uf_entry.tv.num_shared = 2; > - p->uf_entry.user_pages = NULL; > > drm_gem_object_put_unlocked(gobj); > > @@ -539,14 +538,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, > if (usermm && usermm != current->mm) > return -EPERM; > > - /* Check if we have user pages and nobody bound the BO already */ > - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && > - lobj->user_pages) { > + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && > + lobj->user_invalidated && lobj->user_pages) { > amdgpu_bo_placement_from_domain(bo, > AMDGPU_GEM_DOMAIN_CPU); > r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > if (r) > return r; > + > amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, > lobj->user_pages); > binding_userptr = true; > @@ -577,7 +576,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > struct amdgpu_bo *gds; > struct amdgpu_bo *gws; > struct amdgpu_bo *oa; > - unsigned tries = 10; > int r; > > INIT_LIST_HEAD(&p->validated); > @@ -613,79 +611,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) > list_add(&p->uf_entry.tv.head, &p->validated); > > - while (1) { > - struct list_head need_pages; > - > - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, > - &duplicates); > - if (unlikely(r != 0)) { > - if (r != -ERESTARTSYS) > - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); > - goto error_free_pages; > - } > - > - INIT_LIST_HEAD(&need_pages); > - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > - > - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, > - &e->user_invalidated) && e->user_pages) { > - > - /* We acquired a page array, but somebody > - * invalidated it. Free it and try again > - */ > - release_pages(e->user_pages, > - bo->tbo.ttm->num_pages); > - kvfree(e->user_pages); > - e->user_pages = NULL; > - } > - > - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && > - !e->user_pages) { > - list_del(&e->tv.head); > - list_add(&e->tv.head, &need_pages); > - > - amdgpu_bo_unreserve(bo); > - } > + /* Get userptr backing pages. If pages are updated after registered > + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do > + * amdgpu_ttm_backend_bind() to flush and invalidate new pages > + */ > + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > + bool userpage_invalidated = false; > + int i; > + > + e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, > + sizeof(struct page *), > + GFP_KERNEL | __GFP_ZERO); > + if (!e->user_pages) { > + DRM_ERROR("calloc failure\n"); > + return -ENOMEM; > } > > - if (list_empty(&need_pages)) > - break; > - > - /* Unreserve everything again. */ > - ttm_eu_backoff_reservation(&p->ticket, &p->validated); > - > - /* We tried too many times, just abort */ > - if (!--tries) { > - r = -EDEADLK; > - DRM_ERROR("deadlock in %s\n", __func__); > - goto error_free_pages; > + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); > + if (r) { > + kvfree(e->user_pages); > + e->user_pages = NULL; > + return r; > } > > - /* Fill the page arrays for all userptrs. */ > - list_for_each_entry(e, &need_pages, tv.head) { > - struct ttm_tt *ttm = e->tv.bo->ttm; > - > - e->user_pages = kvmalloc_array(ttm->num_pages, > - sizeof(struct page*), > - GFP_KERNEL | __GFP_ZERO); > - if (!e->user_pages) { > - r = -ENOMEM; > - DRM_ERROR("calloc failure in %s\n", __func__); > - goto error_free_pages; > - } > - > - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); > - if (r) { > - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); > - kvfree(e->user_pages); > - e->user_pages = NULL; > - goto error_free_pages; > + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { > + if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { > + userpage_invalidated = true; > + break; > } > } > + e->user_invalidated = userpage_invalidated; > + } > > - /* And try again. */ > - list_splice(&need_pages, &p->validated); > + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, > + &duplicates); > + if (unlikely(r != 0)) { > + if (r != -ERESTARTSYS) > + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); > + goto out; > } > > amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, > @@ -754,17 +718,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, > error_validate: > if (r) > ttm_eu_backoff_reservation(&p->ticket, &p->validated); > - > -error_free_pages: > - > - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > - if (!e->user_pages) > - continue; > - > - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); > - kvfree(e->user_pages); > - } > - > +out: > return r; > } > > @@ -1213,8 +1167,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > struct amdgpu_bo_list_entry *e; > struct amdgpu_job *job; > uint64_t seq; > - > - int r; > + int r = 0; > > job = p->job; > p->job = NULL; > @@ -1223,15 +1176,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > if (r) > goto error_unlock; > > - /* No memory allocation is allowed while holding the mn lock */ > + /* No memory allocation is allowed while holding the mn lock. > + * p->mn is hold until amdgpu_cs_submit is finished and fence is added > + * to BOs. > + */ > amdgpu_mn_lock(p->mn); > + > + /* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */ > amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); > > - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { > - r = -ERESTARTSYS; > - goto error_abort; > - } > + r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > + } > + if (r) { > + r = -ERESTARTSYS; > + goto error_abort; > } > > job->owner = p->filp; > @@ -1278,14 +1237,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > { > struct amdgpu_device *adev = dev->dev_private; > - union drm_amdgpu_cs *cs = data; > - struct amdgpu_cs_parser parser = {}; > - bool reserved_buffers = false; > + union drm_amdgpu_cs *cs; > + struct amdgpu_cs_parser parser; > + bool reserved_buffers; > + int tries = 10; > int i, r; > > if (!adev->accel_working) > return -EBUSY; > > +restart: > + memset(&parser, 0, sizeof(parser)); > + cs = data; > + reserved_buffers = false; > + > parser.adev = adev; > parser.filp = filp; > > @@ -1327,6 +1292,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > > out: > amdgpu_cs_parser_fini(&parser, r, reserved_buffers); > + > + if (r == -ERESTARTSYS) { > + if (!--tries) { > + DRM_ERROR("Possible deadlock? Retry too many times\n"); > + return -EDEADLK; > + } > + goto restart; > + } > + > return r; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > index f4f00217546e..92025993bfb1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c > @@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, > > r = amdgpu_bo_reserve(bo, true); > if (r) > - goto free_pages; > + goto user_pages_done; > > amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); > r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); > amdgpu_bo_unreserve(bo); > if (r) > - goto free_pages; > + goto user_pages_done; > } > > r = drm_gem_handle_create(filp, gobj, &handle); > - /* drop reference from allocate - handle holds it now */ > - drm_gem_object_put_unlocked(gobj); > if (r) > - return r; > + goto user_pages_done; > > args->handle = handle; > - return 0; > > -free_pages: > - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); > +user_pages_done: > + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) > + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > > release_object: > drm_gem_object_put_unlocked(gobj); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > index 5d518d2bb9be..9d40f3001f3c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, > true, false, MAX_SCHEDULE_TIMEOUT); > if (r <= 0) > DRM_ERROR("(%ld) failed to wait for user bo\n", r); > - > - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); > } > } > > @@ -513,3 +511,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) > mutex_unlock(&adev->mn_lock); > } > > +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ > +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { > + (1 << 0), /* HMM_PFN_VALID */ > + (1 << 1), /* HMM_PFN_WRITE */ > + 0 /* HMM_PFN_DEVICE_PRIVATE */ > +}; > + > +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { > + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ > + 0, /* HMM_PFN_NONE */ > + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ > +}; > + > +void amdgpu_hmm_init_range(struct hmm_range *range) > +{ > + if (range) { > + range->flags = hmm_range_flags; > + range->values = hmm_range_values; > + range->pfn_shift = PAGE_SHIFT; > + range->pfns = NULL; > + INIT_LIST_HEAD(&range->list); > + } > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > index 0a51fd00021c..4803e216e174 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h > @@ -25,9 +25,10 @@ > #define __AMDGPU_MN_H__ > > /* > - * MMU Notifier > + * HMM mirror > */ > struct amdgpu_mn; > +struct hmm_range; > > enum amdgpu_mn_type { > AMDGPU_MN_TYPE_GFX, > @@ -41,6 +42,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, > enum amdgpu_mn_type type); > int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); > void amdgpu_mn_unregister(struct amdgpu_bo *bo); > +void amdgpu_hmm_init_range(struct hmm_range *range); > #else > static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} > static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index c91ec3101d00..3d074f4b02f4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -43,6 +43,7 @@ > #include <linux/pagemap.h> > #include <linux/debugfs.h> > #include <linux/iommu.h> > +#include <linux/hmm.h> > #include "amdgpu.h" > #include "amdgpu_object.h" > #include "amdgpu_trace.h" > @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, > /* > * TTM backend functions. > */ > -struct amdgpu_ttm_gup_task_list { > - struct list_head list; > - struct task_struct *task; > -}; > - > struct amdgpu_ttm_tt { > struct ttm_dma_tt ttm; > u64 offset; > @@ -718,85 +714,96 @@ struct amdgpu_ttm_tt { > uint32_t userflags; > spinlock_t guptasklock; > struct list_head guptasks; > - atomic_t mmu_invalidations; > - uint32_t last_set_pages; > + struct hmm_range range; > }; > > /** > - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR > - * pointer to memory > + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user > + * memory and start HMM tracking CPU page table update > * > - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). > - * This provides a wrapper around the get_user_pages() call to provide > - * device accessible pages that back user memory. > + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only > + * once afterwards to stop HMM tracking > */ > int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > struct mm_struct *mm = gtt->usertask->mm; > - unsigned int flags = 0; > - unsigned pinned = 0; > - int r; > + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; > + struct hmm_range *range = >t->range; > + int r = 0, i; > > if (!mm) /* Happens during process shutdown */ > return -ESRCH; > > - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) > - flags |= FOLL_WRITE; > + amdgpu_hmm_init_range(range); > > down_read(&mm->mmap_sem); > > - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { > - /* > - * check that we only use anonymous memory to prevent problems > - * with writeback > - */ > - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; > - struct vm_area_struct *vma; > + range->vma = find_vma(mm, gtt->userptr); > + if (!range_in_vma(range->vma, gtt->userptr, end)) > + r = -EFAULT; > + else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && > + range->vma->vm_file) > + r = -EPERM; > + if (r) > + goto out; > > - vma = find_vma(mm, gtt->userptr); > - if (!vma || vma->vm_file || vma->vm_end < end) { > - up_read(&mm->mmap_sem); > - return -EPERM; > - } > + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), > + GFP_KERNEL); > + if (range->pfns == NULL) { > + r = -ENOMEM; > + goto out; > } > + range->start = gtt->userptr; > + range->end = end; > > - /* loop enough times using contiguous pages of memory */ > - do { > - unsigned num_pages = ttm->num_pages - pinned; > - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; > - struct page **p = pages + pinned; > - struct amdgpu_ttm_gup_task_list guptask; > + range->pfns[0] = range->flags[HMM_PFN_VALID]; > + range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? > + 0 : range->flags[HMM_PFN_WRITE]; > + for (i = 1; i < ttm->num_pages; i++) > + range->pfns[i] = range->pfns[0]; > > - guptask.task = current; > - spin_lock(>t->guptasklock); > - list_add(&guptask.list, >t->guptasks); > - spin_unlock(>t->guptasklock); > + /* This may trigger page table update */ > + r = hmm_vma_fault(range, true); > + if (r) > + goto out_free_pfns; > > - if (mm == current->mm) > - r = get_user_pages(userptr, num_pages, flags, p, NULL); > - else > - r = get_user_pages_remote(gtt->usertask, > - mm, userptr, num_pages, > - flags, p, NULL, NULL); > + up_read(&mm->mmap_sem); > > - spin_lock(>t->guptasklock); > - list_del(&guptask.list); > - spin_unlock(>t->guptasklock); > + for (i = 0; i < ttm->num_pages; i++) > + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); > > - if (r < 0) > - goto release_pages; > + return 0; > > - pinned += r; > +out_free_pfns: > + kvfree(range->pfns); > + range->pfns = NULL; > +out: > + up_read(&mm->mmap_sem); > + return r; > +} > > - } while (pinned < ttm->num_pages); > +/** > + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change > + * Check if the pages backing this ttm range have been invalidated > + * > + * Returns: true if pages are still valid > + */ > +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) > +{ > + struct amdgpu_ttm_tt *gtt = (void *)ttm; > + bool r = false; > > - up_read(&mm->mmap_sem); > - return 0; > + if (!gtt || !gtt->userptr) > + return false; > + > + WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); > + if (gtt->range.pfns) { > + r = hmm_vma_range_done(>t->range); > + kvfree(gtt->range.pfns); > + gtt->range.pfns = NULL; > + } > > -release_pages: > - release_pages(pages, pinned); > - up_read(&mm->mmap_sem); > return r; > } > > @@ -809,16 +816,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) > */ > void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) > { > - struct amdgpu_ttm_tt *gtt = (void *)ttm; > unsigned i; > > - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); > - for (i = 0; i < ttm->num_pages; ++i) { > - if (ttm->pages[i]) > - put_page(ttm->pages[i]); > - > + for (i = 0; i < ttm->num_pages; ++i) > ttm->pages[i] = pages ? pages[i] : NULL; > - } > } > > /** > @@ -903,10 +904,11 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) > /* unmap the pages mapped to the device */ > dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); > > - /* mark the pages as dirty */ > - amdgpu_ttm_tt_mark_user_pages(ttm); > - > sg_free_table(ttm->sg); > + > + if (gtt->range.pfns && > + ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) > + WARN_ONCE(1, "Missing get_user_page_done\n"); > } > > int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, > @@ -1258,8 +1260,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, > > spin_lock_init(>t->guptasklock); > INIT_LIST_HEAD(>t->guptasks); > - atomic_set(>t->mmu_invalidations, 0); > - gtt->last_set_pages = 0; > > return 0; > } > @@ -1289,7 +1289,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, > unsigned long end) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > - struct amdgpu_ttm_gup_task_list *entry; > unsigned long size; > > if (gtt == NULL || !gtt->userptr) > @@ -1302,48 +1301,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, > if (gtt->userptr > end || gtt->userptr + size <= start) > return false; > > - /* Search the lists of tasks that hold this mapping and see > - * if current is one of them. If it is return false. > - */ > - spin_lock(>t->guptasklock); > - list_for_each_entry(entry, >t->guptasks, list) { > - if (entry->task == current) { > - spin_unlock(>t->guptasklock); > - return false; > - } > - } > - spin_unlock(>t->guptasklock); > - > - atomic_inc(>t->mmu_invalidations); > - > return true; > } > > /** > - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? > - */ > -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, > - int *last_invalidated) > -{ > - struct amdgpu_ttm_tt *gtt = (void *)ttm; > - int prev_invalidated = *last_invalidated; > - > - *last_invalidated = atomic_read(>t->mmu_invalidations); > - return prev_invalidated != *last_invalidated; > -} > - > -/** > - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object > - * been invalidated since the last time they've been set? > + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? > */ > -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) > +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) > { > struct amdgpu_ttm_tt *gtt = (void *)ttm; > > if (gtt == NULL || !gtt->userptr) > return false; > > - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; > + return true; > } > > /** > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > index b5b2d101f7db..8988c87fff9d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h > @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); > int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); > > int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); > +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); > void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); > void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); > int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, > @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, > unsigned long end); > bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, > int *last_invalidated); > -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); > +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); > bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); > uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); > uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx