On 07/03/2017 20:58, Chris Wilson wrote:
If we allow the user to convert a GTT mmap address into a userptr, we may end up in recursion hell, where currently we hit a mutex deadlock but other possibilities include use-after-free during the unbind/cancel_userptr. [ 143.203989] gem_userptr_bli D 0 902 898 0x00000000 [ 143.204054] Call Trace: [ 143.204137] __schedule+0x511/0x1180 [ 143.204195] ? pci_mmcfg_check_reserved+0xc0/0xc0 [ 143.204274] schedule+0x57/0xe0 [ 143.204327] schedule_timeout+0x383/0x670 [ 143.204374] ? trace_hardirqs_on_caller+0x187/0x280 [ 143.204457] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 143.204507] ? usleep_range+0x110/0x110 [ 143.204657] ? irq_exit+0x89/0x100 [ 143.204710] ? retint_kernel+0x2d/0x2d [ 143.204794] ? trace_hardirqs_on_caller+0x187/0x280 [ 143.204857] ? _raw_spin_unlock_irq+0x33/0x60 [ 143.204944] wait_for_common+0x1f0/0x2f0 [ 143.205006] ? out_of_line_wait_on_atomic_t+0x170/0x170 [ 143.205103] ? wake_up_q+0xa0/0xa0 [ 143.205159] ? flush_workqueue_prep_pwqs+0x15a/0x2c0 [ 143.205237] wait_for_completion+0x1d/0x20 [ 143.205292] flush_workqueue+0x2e9/0xbb0 [ 143.205339] ? flush_workqueue+0x163/0xbb0 [ 143.205418] ? __schedule+0x533/0x1180 [ 143.205498] ? check_flush_dependency+0x1a0/0x1a0 [ 143.205681] i915_gem_userptr_mn_invalidate_range_start+0x1c7/0x270 [i915] [ 143.205865] ? i915_gem_userptr_dmabuf_export+0x40/0x40 [i915] [ 143.205955] __mmu_notifier_invalidate_range_start+0xc6/0x120 [ 143.206044] ? __mmu_notifier_invalidate_range_start+0x51/0x120 [ 143.206123] zap_page_range_single+0x1c7/0x1f0 [ 143.206171] ? unmap_single_vma+0x160/0x160 [ 143.206260] ? unmap_mapping_range+0xa9/0x1b0 [ 143.206308] ? vma_interval_tree_subtree_search+0x75/0xd0 [ 143.206397] unmap_mapping_range+0x18f/0x1b0 [ 143.206444] ? zap_vma_ptes+0x70/0x70 [ 143.206524] ? __pm_runtime_resume+0x67/0xa0 [ 143.206723] i915_gem_release_mmap+0x1ba/0x1c0 [i915] [ 143.206846] i915_vma_unbind+0x5c2/0x690 [i915] [ 143.206925] ? __lock_is_held+0x52/0x100 [ 143.207076] i915_gem_object_set_tiling+0x1db/0x650 [i915] [ 143.207236] i915_gem_set_tiling_ioctl+0x1d3/0x3b0 [i915] [ 143.207377] ? i915_gem_set_tiling_ioctl+0x5/0x3b0 [i915] [ 143.207457] drm_ioctl+0x36c/0x670 [ 143.207535] ? debug_lockdep_rcu_enabled.part.0+0x1a/0x30 [ 143.207730] ? i915_gem_object_set_tiling+0x650/0x650 [i915] [ 143.207793] ? drm_getunique+0x120/0x120 [ 143.207875] ? __handle_mm_fault+0x996/0x14a0 [ 143.207939] ? vm_insert_page+0x340/0x340 [ 143.208028] ? up_write+0x28/0x50 [ 143.208086] ? vm_mmap_pgoff+0x160/0x190 [ 143.208163] do_vfs_ioctl+0x12c/0xa60 [ 143.208218] ? debug_lockdep_rcu_enabled+0x35/0x40 [ 143.208267] ? ioctl_preallocate+0x150/0x150 [ 143.208353] ? __do_page_fault+0x36a/0x6e0 [ 143.208400] ? mark_held_locks+0x23/0xc0 [ 143.208479] ? up_read+0x1f/0x40 [ 143.208526] ? entry_SYSCALL_64_fastpath+0x5/0xc6 [ 143.208669] ? __fget_light+0xa7/0xc0 [ 143.208747] SyS_ioctl+0x41/0x70 To prevent the possibility of a deadlock, we defer scheduling the worker until after we have proven that given the current mm, the userptr range does not overlap a GGTT mmaping. If another thread tries to remap the GGTT over the userptr before the worker is scheduled, it will be stopped by its invalidate-range flushing the current work, before the deadlock can occur. v2: Improve discussion of how we end up in the deadlock. Reported-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> Testcase: igt/gem_userptr_blits/map-fixed-invalidate-gup Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_userptr.c | 76 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index dc9bf5282071..7addbf08bcb9 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -488,6 +488,36 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, return ret; } +static bool overlaps_ggtt(struct drm_i915_gem_object *obj, struct mm_struct *mm) +{ + const struct vm_operations_struct *gem_vm_ops = + obj->base.dev->driver->gem_vm_ops; + unsigned long addr = obj->userptr.ptr; + const unsigned long end = addr + obj->base.size; + struct vm_area_struct *vma; + + /* Check for a contiguous set of vma covering the userptr, if any + * are absent, they will EFAULT. More importantly if any point back + * to a drm_i915_gem_object GTT mmaping, we may trigger a deadlock + * between the deferred gup of this userptr and the object being + * unbound calling invalidate_range -> cancel_userptr. + */ + for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { + if (vma->vm_start > addr) /* gap */ + break;
If I understand this correctly it is checking for more than ggtt, so would be tempted to either remove those parts of the checks or remove the helper to something like invalid_backing_store.
Also, again if I understand it correctly, if the backing store was made out of two VMAs then wouldn't this return a false positive?
In which case the first option from above sounds better to me. Just checking if there is any overlap with the GTT mapped areas in this function and leave the holes etc to get_user_pages.
+ + if (vma->vm_end >= end) + return false; + + if (vma->vm_ops == gem_vm_ops) /* GTT mmapping */ + break; + + addr = vma->vm_end; + } + + return true; +} + static void __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { @@ -556,8 +586,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } static struct sg_table * -__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, - bool *active) +__i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj) { struct get_pages_work *work; @@ -594,7 +623,7 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, INIT_WORK(&work->work, __i915_gem_userptr_get_pages_worker); schedule_work(&work->work); - *active = true; + __i915_gem_userptr_set_active(obj, true); return ERR_PTR(-EAGAIN); } @@ -602,10 +631,10 @@ static struct sg_table * i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; + struct mm_struct *mm = obj->userptr.mm->mm; struct page **pvec; struct sg_table *pages; - int pinned, ret; - bool active; + int pinned; /* If userspace should engineer that these pages are replaced in * the vma between us binding this page into the GTT and completion @@ -632,37 +661,36 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) return ERR_PTR(-EAGAIN); } - /* Let the mmu-notifier know that we have begun and need cancellation */ - ret = __i915_gem_userptr_set_active(obj, true); - if (ret) - return ERR_PTR(ret); - pvec = NULL; pinned = 0; - if (obj->userptr.mm->mm == current->mm) { - pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), - GFP_TEMPORARY); - if (pvec == NULL) { - __i915_gem_userptr_set_active(obj, false); - return ERR_PTR(-ENOMEM); - } - pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages, - !obj->userptr.read_only, pvec); + down_read(&mm->mmap_sem); + if (unlikely(overlaps_ggtt(obj, mm))) { + pinned = -EFAULT; + } else if (mm == current->mm) { + pvec = drm_malloc_gfp(num_pages, sizeof(struct page *), + GFP_TEMPORARY | + __GFP_NORETRY | + __GFP_NOWARN); + if (pvec) /* defer to worker if malloc fails */ + pinned = __get_user_pages_fast(obj->userptr.ptr, + num_pages, + !obj->userptr.read_only, + pvec); } - active = false; if (pinned < 0) pages = ERR_PTR(pinned), pinned = 0; else if (pinned < num_pages) - pages = __i915_gem_userptr_get_pages_schedule(obj, &active); + pages = __i915_gem_userptr_get_pages_schedule(obj); else pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages);
I don't see _set_active on the fast gup path, maybe it is just too early? :)
- if (IS_ERR(pages)) { - __i915_gem_userptr_set_active(obj, active); + up_read(&mm->mmap_sem); + + if (IS_ERR(pages)) release_pages(pvec, pinned, 0); - } drm_free_large(pvec); + return pages; }
Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx