On 2018-12-03 8:52 p.m., Kuehling, Felix wrote: > See comments inline. I didn't review the amdgpu_cs and amdgpu_gem parts > as I don't know them very well. > > On 2018-12-03 3:19 p.m., Yang, Philip wrote: >> Use HMM helper function hmm_vma_fault() to get physical pages backing >> userptr and start CPU page table update track of those pages. Then use >> hmm_vma_range_done() to check if those pages are updated before >> amdgpu_cs_submit for gfx or before user queues are resumed for kfd. >> >> If userptr pages are updated, for gfx, amdgpu_cs_ioctl will restart >> from scratch, for kfd, restore worker is rescheduled to retry. >> >> To avoid circular lock dependency, no nested locking between mmap_sem >> and bo::reserve. The locking order is: >> bo::reserve -> amdgpu_mn_lock(p->mn) >> >> HMM simplify the CPU page table concurrent update check, so remove >> guptasklock, mmu_invalidations, last_set_pages fields from >> amdgpu_ttm_tt struct. >> >> HMM does not pin the page (increase page ref count), so remove related >> operations like release_pages(), put_page(), mark_page_dirty(). >> >> v2: >> * Remove nested locking between mmap_sem and bo::reserve >> * Change locking order to bo::reserve -> amdgpu_mn_lock() >> * Use dynamic allocation to replace VLA in kernel stack >> >> Change-Id: Iffd5f855cc9ce402cdfca167f68f83fe39ac56f9 >> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> >> --- >> .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 101 ++++++++-- >> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 2 - >> drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 3 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 188 +++++++++--------- >> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 34 +++- >> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 7 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 164 ++++++--------- >> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +- >> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 - >> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 67 ++++--- >> 11 files changed, 312 insertions(+), 272 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> index f3129b912714..5ce6ba24fc72 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c >> @@ -614,8 +614,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, >> amdgpu_bo_unreserve(bo); >> >> release_out: >> - if (ret) >> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); >> + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); >> free_out: >> kvfree(mem->user_pages); >> mem->user_pages = NULL; >> @@ -677,7 +676,6 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = &bo->tbo; >> ctx->kfd_bo.tv.shared = true; >> - ctx->kfd_bo.user_pages = NULL; >> list_add(&ctx->kfd_bo.tv.head, &ctx->list); >> >> amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); >> @@ -741,7 +739,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, >> ctx->kfd_bo.priority = 0; >> ctx->kfd_bo.tv.bo = &bo->tbo; >> ctx->kfd_bo.tv.shared = true; >> - ctx->kfd_bo.user_pages = NULL; >> list_add(&ctx->kfd_bo.tv.head, &ctx->list); >> >> i = 0; >> @@ -1332,9 +1329,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( >> /* Free user pages if necessary */ >> if (mem->user_pages) { >> pr_debug("%s: Freeing user_pages array\n", __func__); >> - if (mem->user_pages[0]) >> - release_pages(mem->user_pages, >> - mem->bo->tbo.ttm->num_pages); >> kvfree(mem->user_pages); >> } >> >> @@ -1761,8 +1755,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, >> __func__); >> return -ENOMEM; >> } >> - } else if (mem->user_pages[0]) { >> - release_pages(mem->user_pages, bo->tbo.ttm->num_pages); >> } >> >> /* Get updated user pages */ >> @@ -1778,12 +1770,6 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, >> * stalled user mode queues. >> */ >> } >> - >> - /* Mark the BO as valid unless it was invalidated >> - * again concurrently >> - */ >> - if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) >> - return -EAGAIN; >> } >> >> return 0; >> @@ -1876,14 +1862,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) >> } >> >> /* Validate succeeded, now the BO owns the pages, free >> - * our copy of the pointer array. Put this BO back on >> - * the userptr_valid_list. If we need to revalidate >> - * it, we need to start from scratch. >> + * our copy of the pointer array. >> */ >> kvfree(mem->user_pages); >> mem->user_pages = NULL; >> - list_move_tail(&mem->validate_list.head, >> - &process_info->userptr_valid_list); >> >> /* Update mapping. If the BO was not validated >> * (because we couldn't get user pages), this will >> @@ -1924,6 +1906,70 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) >> return ret; >> } >> >> +/* user_pages_invalidated - if CPU page table is updated after getting user >> + * pages >> + * >> + * HMM mirror callback lock amn is hold to prevent the concurrent CPU >> + * page table update while resuming user queues. >> + * >> + * Returns: true if CPU page table is updated, false otherwise >> + */ >> +static int user_pages_invalidated(struct mm_struct *mm, >> + struct amdkfd_process_info *process_info, >> + struct amdgpu_mn **amn) >> +{ >> + struct kgd_mem *mem, *tmp_mem; >> + struct amdgpu_bo *bo; >> + struct amdgpu_device *adev; >> + int invalid, r = 0; >> + >> + list_for_each_entry_safe(mem, tmp_mem, >> + &process_info->userptr_inval_list, >> + validate_list.head) { >> + >> + invalid = atomic_read(&mem->invalid); >> + if (!invalid) >> + /* BO hasn't been invalidated since the last >> + * revalidation attempt. Keep its BO list. >> + */ >> + continue; >> + >> + bo = mem->bo; >> + >> + /* Get HMM mirror callback lock */ >> + if (!*amn) { >> + adev = amdgpu_ttm_adev(bo->tbo.bdev); >> + *amn = amdgpu_mn_get(mm, adev, AMDGPU_MN_TYPE_HSA); >> + if (IS_ERR(*amn)) { >> + r = true; >> + *amn = NULL; >> + goto out; >> + } >> + >> + amdgpu_mn_lock(*amn); > There can be multiple GPUs involved here. So you'd need more than one > amn lock. I think you're trying to use the amn lock to serialize page > table updates. But that's probably not the right lock. What we should be > using for this is the page directory reservation locks. We're already > taking them for all affected GPUs in validate_invalid_user_pages. And > that's where you should also be calling amdgpu_ttm_tt_get_user_pages_done. > Yes, this is incorrect for mGPU. Remove this function completely and add calling amdgpu_tmm_tt_get_user_pages_done inside validate_invalid_user_pages while holding the reservation locks. >> + } >> + >> + r |= amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); > This needs to be called while holding the "driver lock that serializes > device page table updates". I think in our case that's the page > directory reservation. You're not holding that here. > > >> + >> + /* Put this BO back on the userptr_valid_list. If we need to >> + * revalidate it, we need to start from scratch. >> + */ >> + list_move_tail(&mem->validate_list.head, >> + &process_info->userptr_valid_list); > Doing this unconditionally here looks wrong. Also, this was already done > in validate_invalid_user_pages. So I'm wondering what's the point of > this function. In most cases the process_info->userptr_inval_list will > be empty here to begin with. > > I think you should probably call amdgpu_ttm_tt_get_user_pages_done in > validate_invalid_user_pages while holding the PD reservation lock and > where you call amdgpu_ttm_tt_set_user_pages. > > >> + >> + /* Mark the BO as valid unless it was invalidated >> + * again concurrently >> + */ >> + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) { >> + r = true; >> + goto out; >> + } >> + } >> + >> +out: >> + return r; >> +} >> + >> /* Worker callback to restore evicted userptr BOs >> * >> * Tries to update and validate all userptr BOs. If successful and no >> @@ -1939,6 +1985,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) >> struct task_struct *usertask; >> struct mm_struct *mm; >> int evicted_bos; >> + struct amdgpu_mn *amn = NULL; >> >> evicted_bos = atomic_read(&process_info->evicted_bos); >> if (!evicted_bos) >> @@ -1977,13 +2024,27 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) >> if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != >> evicted_bos) >> goto unlock_out; >> + >> + /* If CPU page table is updated again after getting user pages, >> + * schedule to restart the restore process again. >> + * >> + * amn is also locked to prevent CPU page table update while resuming >> + * user queues. amn is unlocked after user queues are resumed. >> + */ >> + if (user_pages_invalidated(mm, process_info, &amn)) >> + goto unlock_mn_out; >> + >> evicted_bos = 0; >> + >> if (kgd2kfd->resume_mm(mm)) { >> pr_err("%s: Failed to resume KFD\n", __func__); >> /* No recovery from this failure. Probably the CP is >> * hanging. No point trying again. >> */ >> } >> + >> +unlock_mn_out: >> + amdgpu_mn_unlock(amn); >> unlock_out: >> mutex_unlock(&process_info->lock); >> mmput(mm); > [snip] > >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> index 56595b3d90d2..6b6becdb725b 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >> @@ -229,8 +229,6 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, >> true, false, MAX_SCHEDULE_TIMEOUT); >> if (r <= 0) >> DRM_ERROR("(%ld) failed to wait for user bo\n", r); >> - >> - amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); >> } >> } >> >> @@ -355,15 +353,16 @@ static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = { >> /** >> * amdgpu_mn_get - create HMM mirror context >> * >> + * @mm: the mm struct >> * @adev: amdgpu device pointer >> * @type: type of MMU notifier context >> * >> - * Creates a HMM mirror context for current->mm. >> + * Creates a HMM mirror context for mm. >> */ >> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm, >> + struct amdgpu_device *adev, >> enum amdgpu_mn_type type) >> { >> - struct mm_struct *mm = current->mm; >> struct amdgpu_mn *amn; >> unsigned long key = AMDGPU_MN_KEY(mm, type); >> int r; >> @@ -433,7 +432,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) >> struct list_head bos; >> struct interval_tree_node *it; >> >> - amn = amdgpu_mn_get(adev, type); >> + amn = amdgpu_mn_get(current->mm, adev, type); >> if (IS_ERR(amn)) >> return PTR_ERR(amn); >> >> @@ -515,3 +514,26 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) >> mutex_unlock(&adev->mn_lock); >> } >> >> +/* flags used by HMM internal, not related to CPU/GPU PTE flags */ >> +static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { >> + (1 << 0), /* HMM_PFN_VALID */ >> + (1 << 1), /* HMM_PFN_WRITE */ >> + 0 /* HMM_PFN_DEVICE_PRIVATE */ >> +}; >> + >> +static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { >> + 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ >> + 0, /* HMM_PFN_NONE */ >> + 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ >> +}; >> + >> +void amdgpu_hmm_init_range(struct hmm_range *range) >> +{ >> + if (range) { >> + range->flags = hmm_range_flags; >> + range->values = hmm_range_values; >> + range->pfn_shift = PAGE_SHIFT; >> + range->pfns = NULL; >> + INIT_LIST_HEAD(&range->list); >> + } >> +} >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >> index 0e2752646e6f..59ea30e149bd 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h >> @@ -25,9 +25,10 @@ >> #define __AMDGPU_MN_H__ >> >> /* >> - * MMU Notifier >> + * HMM mirror >> */ >> struct amdgpu_mn; >> +struct hmm_range; >> >> enum amdgpu_mn_type { >> AMDGPU_MN_TYPE_GFX, >> @@ -37,10 +38,12 @@ enum amdgpu_mn_type { >> #if defined(CONFIG_HMM) >> void amdgpu_mn_lock(struct amdgpu_mn *mn); >> void amdgpu_mn_unlock(struct amdgpu_mn *mn); >> -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, >> +struct amdgpu_mn *amdgpu_mn_get(struct mm_struct *mm, >> + struct amdgpu_device *adev, >> enum amdgpu_mn_type type); >> int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); >> void amdgpu_mn_unregister(struct amdgpu_bo *bo); >> +void amdgpu_hmm_init_range(struct hmm_range *range); >> #else >> static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} >> static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> index c91ec3101d00..4a1cb4d15dfb 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >> @@ -43,6 +43,7 @@ >> #include <linux/pagemap.h> >> #include <linux/debugfs.h> >> #include <linux/iommu.h> >> +#include <linux/hmm.h> >> #include "amdgpu.h" >> #include "amdgpu_object.h" >> #include "amdgpu_trace.h" >> @@ -705,11 +706,6 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, >> /* >> * TTM backend functions. >> */ >> -struct amdgpu_ttm_gup_task_list { >> - struct list_head list; >> - struct task_struct *task; >> -}; >> - >> struct amdgpu_ttm_tt { >> struct ttm_dma_tt ttm; >> u64 offset; >> @@ -718,85 +714,91 @@ struct amdgpu_ttm_tt { >> uint32_t userflags; >> spinlock_t guptasklock; >> struct list_head guptasks; >> - atomic_t mmu_invalidations; >> - uint32_t last_set_pages; >> + struct hmm_range range; >> }; >> >> /** >> - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR >> - * pointer to memory >> + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user >> + * memory and start HMM tracking CPU page table update >> * >> - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). >> - * This provides a wrapper around the get_user_pages() call to provide >> - * device accessible pages that back user memory. >> + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only >> + * once afterwards to stop HMM tracking >> */ >> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) >> { >> struct amdgpu_ttm_tt *gtt = (void *)ttm; >> struct mm_struct *mm = gtt->usertask->mm; >> - unsigned int flags = 0; >> - unsigned pinned = 0; >> - int r; >> + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; >> + struct hmm_range *range = >t->range; >> + int r, i; >> >> if (!mm) /* Happens during process shutdown */ >> return -ESRCH; >> >> - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) >> - flags |= FOLL_WRITE; >> + amdgpu_hmm_init_range(range); >> >> down_read(&mm->mmap_sem); >> >> - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { >> - /* >> - * check that we only use anonymous memory to prevent problems >> - * with writeback >> - */ >> - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; >> - struct vm_area_struct *vma; >> + range->vma = find_vma(mm, gtt->userptr); >> + if (!range->vma || range->vma->vm_file || range->vma->vm_end < end) { > I think this breaks file-backed mappings and also adds a new limitation > that the address range has to be a single VMA. This was previously only > required it gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY. Previously this vma is for sanity check only because get_user_pages do not need pass in vma. now get_vma_fault expect a valid vma in range struct, change to keep previous check. Thanks. >> + r = -EPERM; >> + goto out; >> + } >> + range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), >> + GFP_KERNEL | __GFP_ZERO); > Is GFP_ZERO really needed here? The loop below initializes all pfns > explicitly. GFP_ZERO is not needed. > >> + if (range->pfns == NULL) { >> + r = -ENOMEM; >> + goto out; >> + } >> + range->start = gtt->userptr; >> + range->end = end; >> >> - vma = find_vma(mm, gtt->userptr); >> - if (!vma || vma->vm_file || vma->vm_end < end) { >> - up_read(&mm->mmap_sem); >> - return -EPERM; >> - } >> + for (i = 0; i < ttm->num_pages; i++) { >> + range->pfns[i] = range->flags[HMM_PFN_VALID]; >> + range->pfns[i] |= amdgpu_ttm_tt_is_readonly(ttm) ? >> + 0 : range->flags[HMM_PFN_WRITE]; >> } >> >> - /* loop enough times using contiguous pages of memory */ >> - do { >> - unsigned num_pages = ttm->num_pages - pinned; >> - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; >> - struct page **p = pages + pinned; >> - struct amdgpu_ttm_gup_task_list guptask; >> + /* This may triggles page table update */ > Do you mean "trigger"? > > >> + r = hmm_vma_fault(range, true); >> + if (r) >> + goto out_free_pfns; >> >> - guptask.task = current; >> - spin_lock(>t->guptasklock); >> - list_add(&guptask.list, >t->guptasks); >> - spin_unlock(>t->guptasklock); >> + for (i = 0; i < ttm->num_pages; i++) >> + pages[i] = hmm_pfn_to_page(range, range->pfns[i]); >> >> - if (mm == current->mm) >> - r = get_user_pages(userptr, num_pages, flags, p, NULL); >> - else >> - r = get_user_pages_remote(gtt->usertask, >> - mm, userptr, num_pages, >> - flags, p, NULL, NULL); >> + up_read(&mm->mmap_sem); > You can probably up_read before the loop. > > Also, why do you defer freeing of range->pfns to > amdgpu_ttm_tt_get_user_pages_done? The array isn't used for anything > else later, so you could free it here. > > Or instead, don't free it and use it to replace the pages array. It > seems wasteful to maintain two pages arrays and constantly allocate and > deallocate one of them. range->pfns will be used later by hmm_vma_range_done() in amdgpu_ttm_tt_get_user_pages_done > >> + return 0; >> >> - spin_lock(>t->guptasklock); >> - list_del(&guptask.list); >> - spin_unlock(>t->guptasklock); >> +out_free_pfns: >> + kvfree(range->pfns); >> + range->pfns = NULL; > Setting range->pfns = NULL is not needed. This is a local variable > that's about to go out of scope anyway. range is a local pointer to gtt->range which will be freed in amdgpu_ttm_tt_get_user_pages_done, assign to NULL to avoid double free. > >> +out: >> + up_read(&mm->mmap_sem); >> + return r; >> +} >> >> - if (r < 0) >> - goto release_pages; >> +/** >> + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change >> + * Check if the pages backing this ttm range have been invalidated >> + * >> + * Returns: true if pages are invalidated since the last time they've been set >> + */ >> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) >> +{ >> + struct amdgpu_ttm_tt *gtt = (void *)ttm; >> + int r; >> >> - pinned += r; >> + if (gtt == NULL || !gtt->userptr) >> + return false; >> >> - } while (pinned < ttm->num_pages); >> + r = !hmm_vma_range_done(>t->range); >> >> - up_read(&mm->mmap_sem); >> - return 0; >> + if (gtt->range.pfns) { >> + kvfree(gtt->range.pfns); >> + gtt->range.pfns = NULL; >> + } >> >> -release_pages: >> - release_pages(pages, pinned); >> - up_read(&mm->mmap_sem); >> return r; >> } >> >> @@ -809,16 +811,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) >> */ >> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) > Does this function still make sense? It just copies a pages array. Could > it instead use gtt->range->pfns directly? And why does this still have > to be separate from amdgpu_ttm_tt_get_user_pages? I think originally it > was separate because we needed to keep track of the original pages for > unpinning them. But we're no longer pinning and unpinning pages. > Yes, remove mem->user_pages completely, use bo->tbo.ttm->pages instead. >> { >> - struct amdgpu_ttm_tt *gtt = (void *)ttm; >> unsigned i; >> >> - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); >> - for (i = 0; i < ttm->num_pages; ++i) { >> - if (ttm->pages[i]) >> - put_page(ttm->pages[i]); >> - >> + for (i = 0; i < ttm->num_pages; ++i) >> ttm->pages[i] = pages ? pages[i] : NULL; > If this is still needed, it could just be a memcpy now. > >> - } >> } >> >> /** >> @@ -903,9 +899,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) >> /* unmap the pages mapped to the device */ >> dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); >> >> - /* mark the pages as dirty */ >> - amdgpu_ttm_tt_mark_user_pages(ttm); >> - >> sg_free_table(ttm->sg); >> } >> >> @@ -1258,8 +1251,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, >> >> spin_lock_init(>t->guptasklock); >> INIT_LIST_HEAD(>t->guptasks); >> - atomic_set(>t->mmu_invalidations, 0); >> - gtt->last_set_pages = 0; >> >> return 0; >> } >> @@ -1289,7 +1280,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, >> unsigned long end) >> { >> struct amdgpu_ttm_tt *gtt = (void *)ttm; >> - struct amdgpu_ttm_gup_task_list *entry; >> unsigned long size; >> >> if (gtt == NULL || !gtt->userptr) >> @@ -1302,48 +1292,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, >> if (gtt->userptr > end || gtt->userptr + size <= start) >> return false; >> >> - /* Search the lists of tasks that hold this mapping and see >> - * if current is one of them. If it is return false. >> - */ >> - spin_lock(>t->guptasklock); >> - list_for_each_entry(entry, >t->guptasks, list) { >> - if (entry->task == current) { >> - spin_unlock(>t->guptasklock); >> - return false; >> - } >> - } >> - spin_unlock(>t->guptasklock); >> - >> - atomic_inc(>t->mmu_invalidations); >> - >> return true; >> } >> >> /** >> - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? >> + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? >> */ >> -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, >> - int *last_invalidated) >> -{ >> - struct amdgpu_ttm_tt *gtt = (void *)ttm; >> - int prev_invalidated = *last_invalidated; >> - >> - *last_invalidated = atomic_read(>t->mmu_invalidations); >> - return prev_invalidated != *last_invalidated; >> -} >> - >> -/** >> - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object >> - * been invalidated since the last time they've been set? >> - */ >> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) >> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) >> { >> struct amdgpu_ttm_tt *gtt = (void *)ttm; >> >> if (gtt == NULL || !gtt->userptr) >> return false; >> >> - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; >> + return true; >> } >> >> /** >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> index b5b2d101f7db..8988c87fff9d 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h >> @@ -102,6 +102,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); >> int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); >> >> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); >> +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); >> void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); >> void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); >> int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, >> @@ -112,7 +113,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, >> unsigned long end); >> bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, >> int *last_invalidated); >> -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); >> +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); >> bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); >> uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); >> uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> index 2acb9838913e..4a67a88acea1 100644 >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c >> @@ -618,7 +618,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, >> entry->priority = 0; >> entry->tv.bo = &vm->root.base.bo->tbo; >> entry->tv.shared = true; >> - entry->user_pages = NULL; >> list_add(&entry->tv.head, validated); >> } >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >> index 8372556b52eb..fe120cc0930c 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c > Please make everything below a separate commit. Ok, submit separate patch for the changes below. > Regards, > Felix > > >> @@ -1158,6 +1158,33 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, >> >> retval = 0; >> >> + /* Do init_mqd before dqm_lock(dqm) to avoid circular locking order: >> + * lock(dqm) -> bo::reserve >> + */ >> + mqd_mgr = dqm->ops.get_mqd_manager(dqm, >> + get_mqd_type_from_queue_type(q->properties.type)); >> + >> + if (!mqd_mgr) { >> + retval = -ENOMEM; >> + goto out; >> + } >> + >> + /* >> + * Eviction state logic: we only mark active queues as evicted >> + * to avoid the overhead of restoring inactive queues later >> + */ >> + if (qpd->evicted) >> + q->properties.is_evicted = (q->properties.queue_size > 0 && >> + q->properties.queue_percent > 0 && >> + q->properties.queue_address != 0); >> + dqm->asic_ops.init_sdma_vm(dqm, q, qpd); >> + q->properties.tba_addr = qpd->tba_addr; >> + q->properties.tma_addr = qpd->tma_addr; >> + retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, >> + &q->gart_mqd_addr, &q->properties); >> + if (retval) >> + goto out; >> + >> dqm_lock(dqm); >> >> if (dqm->total_queue_count >= max_num_of_queues_per_device) { >> @@ -1181,30 +1208,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, >> if (retval) >> goto out_deallocate_sdma_queue; >> >> - mqd_mgr = dqm->ops.get_mqd_manager(dqm, >> - get_mqd_type_from_queue_type(q->properties.type)); >> - >> - if (!mqd_mgr) { >> - retval = -ENOMEM; >> - goto out_deallocate_doorbell; >> - } >> - /* >> - * Eviction state logic: we only mark active queues as evicted >> - * to avoid the overhead of restoring inactive queues later >> - */ >> - if (qpd->evicted) >> - q->properties.is_evicted = (q->properties.queue_size > 0 && >> - q->properties.queue_percent > 0 && >> - q->properties.queue_address != 0); >> - >> - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); >> - >> - q->properties.tba_addr = qpd->tba_addr; >> - q->properties.tma_addr = qpd->tma_addr; >> - retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, >> - &q->gart_mqd_addr, &q->properties); >> - if (retval) >> - goto out_deallocate_doorbell; >> >> list_add(&q->list, &qpd->queues_list); >> qpd->queue_count++; >> @@ -1228,14 +1231,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, >> dqm_unlock(dqm); >> return retval; >> >> -out_deallocate_doorbell: >> - deallocate_doorbell(qpd, q); >> out_deallocate_sdma_queue: >> if (q->properties.type == KFD_QUEUE_TYPE_SDMA) >> deallocate_sdma_queue(dqm, q->sdma_id); >> out_unlock: >> dqm_unlock(dqm); >> - >> +out: >> return retval; >> } >> >> @@ -1398,8 +1399,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, >> qpd->reset_wavefronts = true; >> } >> >> - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); >> - >> /* >> * Unconditionally decrement this counter, regardless of the queue's >> * type >> @@ -1410,6 +1409,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, >> >> dqm_unlock(dqm); >> >> + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ >> + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); >> + >> return retval; >> >> failed: >> @@ -1631,7 +1633,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, >> qpd->reset_wavefronts = false; >> } >> >> - /* lastly, free mqd resources */ >> + dqm_unlock(dqm); >> + >> + /* Lastly, free mqd resources. >> + * Do uninit_mqd() after dqm_unlock to avoid circular locking. >> + */ >> list_for_each_entry_safe(q, next, &qpd->queues_list, list) { >> mqd_mgr = dqm->ops.get_mqd_manager(dqm, >> get_mqd_type_from_queue_type(q->properties.type)); >> @@ -1645,7 +1651,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, >> } >> >> out: >> - dqm_unlock(dqm); >> return retval; >> } >> _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx