Use HMM helper function hmm_vma_fault() to get physical pages backing userptr and start CPU page table update track of those pages. Then use hmm_vma_range_done() to check if those pages are updated before amdgpu_cs_submit for gfx or before user queues are resumed for kfd. If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart from scratch, for kfd, restore worker is rescheduled to retry. To avoid circular lock dependency, no nested locking between mmap_sem and bo::reserve. The locking order is: bo::reserve -> amdgpu_mn_lock(p->mn) HMM simplify the CPU page table concurrent update check, so remove guptasklock, mmu_invalidations, last_set_pages fields from amdgpu_ttm_tt struct. HMM does not pin the page (increase page ref count), so remove related operations like release_pages(), put_page(), mark_page_dirty(). v2: * Remove nested locking between mmap_sem and bo::reserve * Change locking order to bo::reserve -> amdgpu_mn_lock() * Use dynamic allocation to replace VLA in kernel stack Change-Id: Iffd5f855cc9ce402cdfca167f68f83fe39ac56f9 Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 101 ++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 188 +++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 34 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 164 ++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 - .../drm/amd/amdkfd/kfd_device_queue_manager.c | 67 ++++--- 11 files changed, 312 insertions(+), 272 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f3129b912714..5ce6ba24fc72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -614,8 +614,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - if (ret) - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); free_out: kvfree(mem->user_pages); mem->user_pages = NULL; @@ -677,7 +676,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.shared = true; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -741,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.shared = true; - ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -1332,9 +1329,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free user pages if necessary */ if (mem->user_pages) { pr_debug("%s: Freeing user_pages array\n", __func__); - if (mem->user_pages[0]) - release_pages(mem->user_pages, - mem->bo->tbo.ttm->num_pages); kvfree(mem->user_pages); } @@ -1761,8 +1755,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, __func__); return -ENOMEM; } - } else if (mem->user_pages[0]) { - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); } /* Get updated user pages */ @@ -1778,12 +1770,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * stalled user mode queues. */ } - - /* Mark the BO as valid unless it was invalidated - * again concurrently - */ - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) - return -EAGAIN; } return 0; @@ -1876,14 +1862,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } /* Validate succeeded, now the BO owns the pages, free - * our copy of the pointer array. Put this BO back on - * the userptr_valid_list. If we need to revalidate - * it, we need to start from scratch. + * our copy of the pointer array. */ kvfree(mem->user_pages); mem->user_pages = NULL; - list_move_tail(&mem->validate_list.head, - &process_info->userptr_valid_list); /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will @@ -1924,6 +1906,70 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) return ret; } +/* user_pages_invalidated - if CPU page table is updated after getting user + * pages + * + * HMM mirror callback lock amn is hold to prevent the concurrent CPU + * page table update while resuming user queues. + * + * Returns: true if CPU page table is updated, false otherwise + */ +static int user_pages_invalidated(struct mm_struct *mm, + struct amdkfd_process_info *process_info, + struct amdgpu_mn **amn) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct amdgpu_device *adev; + int invalid, r = 0; + + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + + invalid = atomic_read(&mem->invalid); + if (!invalid) + /* BO hasn't been invalidated since the last + * revalidation attempt. Keep its BO list. + */ + continue; + + bo = mem->bo; + + /* Get HMM mirror callback lock */ + if (!*amn) { + adev = amdgpu_ttm_adev(bo->tbo.bdev); + *amn = amdgpu_mn_get(mm, adev, AMDGPU_MN_TYPE_HSA); + if (IS_ERR(*amn)) { + r = true; + *amn = NULL; + goto out; + } + + amdgpu_mn_lock(*amn); + } + + r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + + /* Put this BO back on the userptr_valid_list. If we need to + * revalidate it, we need to start from scratch. + */ + list_move_tail(&mem->validate_list.head, + &process_info->userptr_valid_list); + + /* Mark the BO as valid unless it was invalidated + * again concurrently + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) { + r = true; + goto out; + } + } + +out: + return r; +} + /* Worker callback to restore evicted userptr BOs * * Tries to update and validate all userptr BOs. If successful and no @@ -1939,6 +1985,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) struct task_struct *usertask; struct mm_struct *mm; int evicted_bos; + struct amdgpu_mn *amn = NULL; evicted_bos = atomic_read(&process_info->evicted_bos); if (!evicted_bos) @@ -1977,13 +2024,27 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != evicted_bos) goto unlock_out; + + /* If CPU page table is updated again after getting user pages, + * schedule to restart the restore process again. + * + * amn is also locked to prevent CPU page table update while resuming + * user queues. amn is unlocked after user queues are resumed. + */ + if (user_pages_invalidated(mm, process_info, &amn)) + goto unlock_mn_out; + evicted_bos = 0; + if (kgd2kfd->resume_mm(mm)) { pr_err("%s: Failed to resume KFD\n", __func__); /* No recovery from this failure. Probably the CP is * hanging. No point trying again. */ } + +unlock_mn_out: + amdgpu_mn_unlock(amn); unlock_out: mutex_unlock(&process_info->lock); mmput(mm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 14d2982a47cc..2716c241f57e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -201,8 +201,6 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, if (!bo->parent) list_add_tail(&e->tv.head, &bucket[priority]); - - e->user_pages = NULL; } /* Connect the sorted buckets in the output list. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 7c5f5d1601e6..4beab2de09b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -35,8 +35,7 @@ struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; uint32_t priority; - struct page **user_pages; - int user_invalidated; + bool user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 024dfbd87f11..cee7d398916c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -51,7 +51,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; p->uf_entry.tv.shared = true; - p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -531,24 +530,19 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); - bool binding_userptr = false; struct mm_struct *usermm; usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm && usermm != current->mm) return -EPERM; - /* Check if we have user pages and nobody bound the BO already */ - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - lobj->user_pages) { + if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && + lobj->user_invalidated) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - lobj->user_pages); - binding_userptr = true; } if (p->evictable == lobj) @@ -557,11 +551,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, r = amdgpu_cs_validate(p, bo); if (r) return r; - - if (binding_userptr) { - kvfree(lobj->user_pages); - lobj->user_pages = NULL; - } } return 0; } @@ -576,7 +565,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; - unsigned tries = 10; + struct list_head userpage_list; int r; INIT_LIST_HEAD(&p->validated); @@ -585,7 +574,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (cs->in.bo_list_handle) { if (p->bo_list) return -EINVAL; - r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle, &p->bo_list); if (r) @@ -600,7 +588,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); + p->mn = amdgpu_mn_get(current->mm, p->adev, + AMDGPU_MN_TYPE_GFX); INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); @@ -608,79 +597,72 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - while (1) { - struct list_head need_pages; + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; + } - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto error_free_pages; - } + INIT_LIST_HEAD(&userpage_list); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - INIT_LIST_HEAD(&need_pages); - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + list_del(&e->tv.head); + list_add(&e->tv.head, &userpage_list); + amdgpu_bo_unreserve(bo); + } - if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, - &e->user_invalidated) && e->user_pages) { + /* Get userptr backing pages. If pages are updated after registered + * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do + * amdgpu_ttm_backend_bind() to flush and invalidate new pages + */ + if (!list_empty(&userpage_list)) { + /* Unreserve everything again, to avoid circular locking case + * bo::reserve -> mmap->sem + */ + ttm_eu_backoff_reservation(&p->ticket, &p->validated); - /* We acquired a page array, but somebody - * invalidated it. Free it and try again - */ - release_pages(e->user_pages, - bo->tbo.ttm->num_pages); - kvfree(e->user_pages); - e->user_pages = NULL; + list_for_each_entry(e, &userpage_list, tv.head) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + bool userpage_invalidated = false; + struct page **pages; + int i; + + pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(*pages), + GFP_KERNEL | __GFP_ZERO); + if (!pages) { + DRM_ERROR("calloc failure\n"); + return -ENOMEM; } - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && - !e->user_pages) { - list_del(&e->tv.head); - list_add(&e->tv.head, &need_pages); - - amdgpu_bo_unreserve(bo); + r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, pages); + if (r) { + kvfree(pages); + return r; } - } - if (list_empty(&need_pages)) - break; - - /* Unreserve everything again. */ - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - - /* We tried too many times, just abort */ - if (!--tries) { - r = -EDEADLK; - DRM_ERROR("deadlock in %s\n", __func__); - goto error_free_pages; + for (i = 0; i < bo->tbo.ttm->num_pages; i++) { + if (bo->tbo.ttm->pages[i] != pages[i]) { + bo->tbo.ttm->pages[i] = pages[i]; + userpage_invalidated = true; + } + } + e->user_invalidated = userpage_invalidated; + kvfree(pages); } - /* Fill the page arrays for all userptrs. */ - list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->tv.bo->ttm; - - e->user_pages = kvmalloc_array(ttm->num_pages, - sizeof(struct page*), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - r = -ENOMEM; - DRM_ERROR("calloc failure in %s\n", __func__); - goto error_free_pages; - } + list_splice(&userpage_list, &p->validated); - r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); - if (r) { - DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); - kvfree(e->user_pages); - e->user_pages = NULL; - goto error_free_pages; - } + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto out; } - - /* And try again. */ - list_splice(&need_pages, &p->validated); } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -743,17 +725,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - -error_free_pages: - - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - if (!e->user_pages) - continue; - - release_pages(e->user_pages, e->tv.bo->ttm->num_pages); - kvfree(e->user_pages); - } - +out: return r; } @@ -1206,8 +1178,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; - - int r; + int r = 0; job = p->job; p->job = NULL; @@ -1216,15 +1187,21 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock */ + /* No memory allocation is allowed while holding the mn lock. + * p->mn is hold until amdgpu_cs_submit is finished and fence is added + * to BOs. + */ amdgpu_mn_lock(p->mn); + + /* If userptr are updated after amdgpu_cs_parser_bos(), restart cs */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { - r = -ERESTARTSYS; - goto error_abort; - } + r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + } + if (r) { + r = -ERESTARTSYS; + goto error_abort; } job->owner = p->filp; @@ -1271,14 +1248,20 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - union drm_amdgpu_cs *cs = data; - struct amdgpu_cs_parser parser = {}; - bool reserved_buffers = false; + union drm_amdgpu_cs *cs; + struct amdgpu_cs_parser parser; + bool reserved_buffers; + int tries = 10; int i, r; if (!adev->accel_working) return -EBUSY; +restart: + memset(&parser, 0, sizeof(parser)); + cs = data; + reserved_buffers = false; + parser.adev = adev; parser.filp = filp; @@ -1320,6 +1303,15 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); + + if (r == -ERESTARTSYS) { + if (!--tries) { + DRM_ERROR("Possible deadlock? Retry too many times\n"); + return -EDEADLK; + } + goto restart; + } + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7b3d1ebda9df..ff9a8fdab20f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -336,26 +336,24 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto free_pages; + goto user_pages_done; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto free_pages; + goto user_pages_done; } r = drm_gem_handle_create(filp, gobj, &handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put_unlocked(gobj); if (r) - return r; + goto user_pages_done; args->handle = handle; - return 0; -free_pages: - release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); +user_pages_done: + if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 56595b3d90d2..6b6becdb725b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); - - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); } } @@ -355,15 +353,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { /** * amdgpu_mn_get - create HMM mirror context * + * @mm: the mm struct * @adev: amdgpu device pointer * @type: type of MMU notifier context * - * Creates a HMM mirror context for current->mm. + * Creates a HMM mirror context for mm. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm, + struct amdgpu_device *adev, enum amdgpu_mn_type type) { - struct mm_struct *mm = current->mm; struct amdgpu_mn *amn; unsigned long key = AMDGPU_MN_KEY(mm, type); int r; @@ -433,7 +432,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) struct list_head bos; struct interval_tree_node *it; - amn = amdgpu_mn_get(adev, type); + amn = amdgpu_mn_get(current->mm, adev, type); if (IS_ERR(amn)) return PTR_ERR(amn); @@ -515,3 +514,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mutex_unlock(&adev->mn_lock); } +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { + (1 << 0), /* HMM_PFN_VALID */ + (1 << 1), /* HMM_PFN_WRITE */ + 0 /* HMM_PFN_DEVICE_PRIVATE */ +}; + +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ + 0, /* HMM_PFN_NONE */ + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ +}; + +void amdgpu_hmm_init_range(struct hmm_range *range) +{ + if (range) { + range->flags = hmm_range_flags; + range->values = hmm_range_values; + range->pfn_shift = PAGE_SHIFT; + range->pfns = NULL; + INIT_LIST_HEAD(&range->list); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 0e2752646e6f..59ea30e149bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,9 +25,10 @@ #define __AMDGPU_MN_H__ /* - * MMU Notifier + * HMM mirror */ struct amdgpu_mn; +struct hmm_range; enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, @@ -37,10 +38,12 @@ enum amdgpu_mn_type { #if defined(CONFIG_HMM) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm, + struct amdgpu_device *adev, enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); +void amdgpu_hmm_init_range(struct hmm_range *range); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c91ec3101d00..4a1cb4d15dfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,6 +43,7 @@ #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/iommu.h> +#include <linux/hmm.h> #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; @@ -718,85 +714,91 @@ struct amdgpu_ttm_tt { uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; + struct hmm_range range; }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct hmm_range *range = >t->range; + int r, i; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; + amdgpu_hmm_init_range(range); down_read(&mm->mmap_sem); - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + range->vma = find_vma(mm, gtt->userptr); + if (!range->vma || range->vma->vm_file || range->vma->vm_end < end) { + r = -EPERM; + goto out; + } + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), + GFP_KERNEL | __GFP_ZERO); + if (range->pfns == NULL) { + r = -ENOMEM; + goto out; + } + range->start = gtt->userptr; + range->end = end; - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; - } + for (i = 0; i < ttm->num_pages; i++) { + range->pfns[i] = range->flags[HMM_PFN_VALID]; + range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; } - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + /* This may triggles page table update */ + r = hmm_vma_fault(range, true); + if (r) + goto out_free_pfns; - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); + for (i = 0; i < ttm->num_pages; i++) + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); + up_read(&mm->mmap_sem); + return 0; - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); +out_free_pfns: + kvfree(range->pfns); + range->pfns = NULL; +out: + up_read(&mm->mmap_sem); + return r; +} - if (r < 0) - goto release_pages; +/** + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated + * + * Returns: true if pages are invalidated since the last time they've been set + */ +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; - pinned += r; + if (gtt == NULL || !gtt->userptr) + return false; - } while (pinned < ttm->num_pages); + r = !hmm_vma_range_done(>t->range); - up_read(&mm->mmap_sem); - return 0; + if (gtt->range.pfns) { + kvfree(gtt->range.pfns); + gtt->range.pfns = NULL; + } -release_pages: - release_pages(pages, pinned); - up_read(&mm->mmap_sem); return r; } @@ -809,16 +811,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); - + for (i = 0; i < ttm->num_pages; ++i) ttm->pages[i] = pages ? pages[i] : NULL; - } } /** @@ -903,9 +899,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); } @@ -1258,8 +1251,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; return 0; } @@ -1289,7 +1280,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1302,48 +1292,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? - */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index b5b2d101f7db..8988c87fff9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2acb9838913e..4a67a88acea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -618,7 +618,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; entry->tv.shared = true; - entry->user_pages = NULL; list_add(&entry->tv.head, validated); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..fe120cc0930c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = 0; + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: + * lock(dqm) -> bo::reserve + */ + mqd_mgr = dqm->ops.get_mqd_manager(dqm, + get_mqd_type_from_queue_type(q->properties.type)); + + if (!mqd_mgr) { + retval = -ENOMEM; + goto out; + } + + /* + * Eviction state logic: we only mark active queues as evicted + * to avoid the overhead of restoring inactive queues later + */ + if (qpd->evicted) + q->properties.is_evicted = (q->properties.queue_size > 0 && + q->properties.queue_percent > 0 && + q->properties.queue_address != 0); + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); + q->properties.tba_addr = qpd->tba_addr; + q->properties.tma_addr = qpd->tma_addr; + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval) + goto out; + dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { @@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_sdma_queue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_deallocate_doorbell; - } - /* - * Eviction state logic: we only mark active queues as evicted - * to avoid the overhead of restoring inactive queues later - */ - if (qpd->evicted) - q->properties.is_evicted = (q->properties.queue_size > 0 && - q->properties.queue_percent > 0 && - q->properties.queue_address != 0); - - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - - q->properties.tba_addr = qpd->tba_addr; - q->properties.tma_addr = qpd->tma_addr; - retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, - &q->gart_mqd_addr, &q->properties); - if (retval) - goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, dqm_unlock(dqm); return retval; -out_deallocate_doorbell: - deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q->sdma_id); out_unlock: dqm_unlock(dqm); - +out: return retval; } @@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; failed: @@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { mqd_mgr = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); @@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, } out: - dqm_unlock(dqm); return retval; } -- 2.17.1 _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx