[+Jerome] On 2019-06-03 7:20 p.m., Yang, Philip wrote: > > On 2019-06-03 5:02 p.m., Kuehling, Felix wrote: >> On 2019-06-03 2:44 p.m., Yang, Philip wrote: >>> HMM provides new APIs and helps in kernel 5.2-rc1 to simplify driver >>> path. The old hmm APIs are deprecated and will be removed in future. >>> >>> Below are changes in driver: >>> >>> 1. Change hmm_vma_fault to hmm_range_register and hmm_range_fault which >>> supports range with multiple vmas, remove the multiple vmas handle path >>> and data structure. >>> 2. Change hmm_vma_range_done to hmm_range_unregister. >>> 3. Use default flags to avoid pre-fill pfn arrays. >>> 4. Use new hmm_device_ helpers. >>> >>> v2: retry if hmm_range_fault returns -EAGAIN >>> v3: define MAX_RETRY_HMM_RANGE_FAULT, skip drop mmap_sem if retry >> I think dropping mmap_sem for retry was correct in v2 of this patch. Now >> you will try to take the mmap_sem multiple times without dropping it if >> you retry. >> >> We don't need to hold the mmap_sem during hmm_range_wait_until_valid. >> > hmm_range_fault will drop the mmap_sem internally before returning > -EAGAIN, I think it does this on purpose to avoid upper level call retry > whithout release mmap_sem in between. Good catch. Looks like the documentation of hmm_range_fault is outdated. It says -EAGAIN can only be returned if block==false, and that mmap_sem is not dropped if block==true. Both of these statements are no longer true. The example code in Documentation/vm/hmm.rst drops the mmap_sem explicitly after hmm_range_snapshot returned -EAGAIN. It seems that hmm_range_snapshot behaves subtly differently from hmm_range_fault with respect to dropping the mmap_sem. That's very confusing and annoying. Regards, Felix > I find v2 was incorrect after > reading hmm code again today. I should mention details of this change in > my commit, see more below. > > Philip > >> See more below ... >> >> >>> Change-Id: I1a00f88459f3b96c9536933e9a17eb1ebaa78113 >>> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> >>> --- >>> drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 1 - >>> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 149 ++++++++++-------------- >>> 2 files changed, 64 insertions(+), 86 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> index 41ccee49a224..e0bb47ce570b 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c >>> @@ -519,7 +519,6 @@ void amdgpu_hmm_init_range(struct hmm_range *range) >>> range->flags = hmm_range_flags; >>> range->values = hmm_range_values; >>> range->pfn_shift = PAGE_SHIFT; >>> - range->pfns = NULL; >>> INIT_LIST_HEAD(&range->list); >>> } >>> } >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> index eb4b85061c7e..9de6a2f5e572 100644 >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c >>> @@ -711,8 +711,7 @@ struct amdgpu_ttm_tt { >>> struct task_struct *usertask; >>> uint32_t userflags; >>> #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) >>> - struct hmm_range *ranges; >>> - int nr_ranges; >>> + struct hmm_range *range; >>> #endif >>> }; >>> >>> @@ -725,57 +724,36 @@ struct amdgpu_ttm_tt { >>> */ >>> #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) >>> >>> -/* Support Userptr pages cross max 16 vmas */ >>> -#define MAX_NR_VMAS (16) >>> +#define MAX_RETRY_HMM_RANGE_FAULT 16 >>> >>> int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) >>> { >>> struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> struct mm_struct *mm = gtt->usertask->mm; >>> unsigned long start = gtt->userptr; >>> - unsigned long end = start + ttm->num_pages * PAGE_SIZE; >>> - struct vm_area_struct *vma = NULL, *vmas[MAX_NR_VMAS]; >>> - struct hmm_range *ranges; >>> - unsigned long nr_pages, i; >>> - uint64_t *pfns, f; >>> + struct vm_area_struct *vma; >>> + struct hmm_range *range; >>> + unsigned long i; >>> + uint64_t *pfns; >>> + int retry = 0; >>> int r = 0; >>> >>> if (!mm) /* Happens during process shutdown */ >>> return -ESRCH; >>> >>> - down_read(&mm->mmap_sem); >>> - >>> - /* user pages may cross multiple VMAs */ >>> - gtt->nr_ranges = 0; >>> - do { >>> - unsigned long vm_start; >>> - >>> - if (gtt->nr_ranges >= MAX_NR_VMAS) { >>> - DRM_ERROR("Too many VMAs in userptr range\n"); >>> - r = -EFAULT; >>> - goto out; >>> - } >>> - >>> - vm_start = vma ? vma->vm_end : start; >>> - vma = find_vma(mm, vm_start); >>> - if (unlikely(!vma || vm_start < vma->vm_start)) { >>> - r = -EFAULT; >>> - goto out; >>> - } >>> - vmas[gtt->nr_ranges++] = vma; >>> - } while (end > vma->vm_end); >>> - >>> - DRM_DEBUG_DRIVER("0x%lx nr_ranges %d pages 0x%lx\n", >>> - start, gtt->nr_ranges, ttm->num_pages); >>> - >>> + vma = find_vma(mm, start); >>> + if (unlikely(!vma || start < vma->vm_start)) { >>> + r = -EFAULT; >>> + goto out; >>> + } >>> if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && >>> - vmas[0]->vm_file)) { >>> + vma->vm_file)) { >>> r = -EPERM; >>> goto out; >>> } >>> >>> - ranges = kvmalloc_array(gtt->nr_ranges, sizeof(*ranges), GFP_KERNEL); >>> - if (unlikely(!ranges)) { >>> + range = kzalloc(sizeof(*range), GFP_KERNEL); >>> + if (unlikely(!range)) { >>> r = -ENOMEM; >>> goto out; >>> } >>> @@ -786,61 +764,62 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) >>> goto out_free_ranges; >>> } >>> >>> - for (i = 0; i < gtt->nr_ranges; i++) >>> - amdgpu_hmm_init_range(&ranges[i]); >>> + amdgpu_hmm_init_range(range); >>> + range->default_flags = range->flags[HMM_PFN_VALID]; >>> + range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? >>> + 0 : range->flags[HMM_PFN_WRITE]; >>> + range->pfn_flags_mask = 0; >>> + range->pfns = pfns; >>> + hmm_range_register(range, mm, start, >>> + start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); >>> >>> - f = ranges[0].flags[HMM_PFN_VALID]; >>> - f |= amdgpu_ttm_tt_is_readonly(ttm) ? >>> - 0 : ranges[0].flags[HMM_PFN_WRITE]; >>> - memset64(pfns, f, ttm->num_pages); >>> +retry: >>> + /* >>> + * Just wait for range to be valid, safe to ignore return value as we >>> + * will use the return value of hmm_range_fault() below under the >>> + * mmap_sem to ascertain the validity of the range. >>> + */ >>> + hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); >>> >>> - for (nr_pages = 0, i = 0; i < gtt->nr_ranges; i++) { >>> - ranges[i].vma = vmas[i]; >>> - ranges[i].start = max(start, vmas[i]->vm_start); >>> - ranges[i].end = min(end, vmas[i]->vm_end); >>> - ranges[i].pfns = pfns + nr_pages; >>> - nr_pages += (ranges[i].end - ranges[i].start) / PAGE_SIZE; >>> + down_read(&mm->mmap_sem); >> If you take the mmap_sem here, you have to drop it before goto retry. >> Otherwise you'll end up taking the mmap_sem multiple times. It's not >> fatal immediately because you can take multiple read locks. But if you >> don't drop all the read locks, no writer will ever be able to take a >> write lock again. >> > mmap_sem dropped by hmm_range_fault before returning -EAGAIN, have to > take it again. >>> >>> - r = hmm_vma_fault(&ranges[i], true); >>> - if (unlikely(r)) >>> - break; >>> - } >>> - if (unlikely(r)) { >>> - while (i--) >>> - hmm_vma_range_done(&ranges[i]); >>> + r = hmm_range_fault(range, true); >>> + if (unlikely(r < 0)) { >>> + if (likely(r == -EAGAIN)) { >>> + if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) >>> + goto retry; >> You're still holding the mmap_sem here. >> >> >>> + else >>> + pr_err("Retry hmm fault too many times\n"); >>> + } >>> >>> goto out_free_pfns; >>> } >>> >>> - up_read(&mm->mmap_sem); >>> - >>> for (i = 0; i < ttm->num_pages; i++) { >>> - pages[i] = hmm_pfn_to_page(&ranges[0], pfns[i]); >>> - if (!pages[i]) { >>> + pages[i] = hmm_device_entry_to_page(range, pfns[i]); >>> + if (unlikely(!pages[i])) { >>> pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", >>> i, pfns[i]); >>> - goto out_invalid_pfn; >>> + r = -ENOMEM; >>> + >>> + goto out_free_pfns; >>> } >>> } >>> - gtt->ranges = ranges; >>> + >>> + up_read(&mm->mmap_sem); >> Why do you drop the mmap_sem so late? Does the loop above require it? I >> don't believe it does. >> > Yes, I can drop the mmap_sem before the loop, just wanted to remove one > line of duplicate up_read call. > >> Regards, >> Felix >> >> >>> + >>> + gtt->range = range; >>> >>> return 0; >>> >>> out_free_pfns: >>> + up_read(&mm->mmap_sem); >>> + hmm_range_unregister(range); >>> kvfree(pfns); >>> out_free_ranges: >>> - kvfree(ranges); >>> + kfree(range); >>> out: >>> - up_read(&mm->mmap_sem); >>> - >>> return r; >>> - >>> -out_invalid_pfn: >>> - for (i = 0; i < gtt->nr_ranges; i++) >>> - hmm_vma_range_done(&ranges[i]); >>> - kvfree(pfns); >>> - kvfree(ranges); >>> - return -ENOMEM; >>> } >>> >>> /** >>> @@ -853,23 +832,23 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) >>> { >>> struct amdgpu_ttm_tt *gtt = (void *)ttm; >>> bool r = false; >>> - int i; >>> >>> if (!gtt || !gtt->userptr) >>> return false; >>> >>> - DRM_DEBUG_DRIVER("user_pages_done 0x%llx nr_ranges %d pages 0x%lx\n", >>> - gtt->userptr, gtt->nr_ranges, ttm->num_pages); >>> + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", >>> + gtt->userptr, ttm->num_pages); >>> >>> - WARN_ONCE(!gtt->ranges || !gtt->ranges[0].pfns, >>> + WARN_ONCE(!gtt->range || !gtt->range->pfns, >>> "No user pages to check\n"); >>> >>> - if (gtt->ranges) { >>> - for (i = 0; i < gtt->nr_ranges; i++) >>> - r |= hmm_vma_range_done(>t->ranges[i]); >>> - kvfree(gtt->ranges[0].pfns); >>> - kvfree(gtt->ranges); >>> - gtt->ranges = NULL; >>> + if (gtt->range) { >>> + r = hmm_range_valid(gtt->range); >>> + hmm_range_unregister(gtt->range); >>> + >>> + kvfree(gtt->range->pfns); >>> + kfree(gtt->range); >>> + gtt->range = NULL; >>> } >>> >>> return r; >>> @@ -953,9 +932,9 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) >>> sg_free_table(ttm->sg); >>> >>> #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) >>> - if (gtt->ranges && >>> - ttm->pages[0] == hmm_pfn_to_page(>t->ranges[0], >>> - gtt->ranges[0].pfns[0])) >>> + if (gtt->range && >>> + ttm->pages[0] == hmm_device_entry_to_page(gtt->range, >>> + gtt->range->pfns[0])) >>> WARN_ONCE(1, "Missing get_user_page_done\n"); >>> #endif >>> } _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx