On Thu, Oct 01, 2015 at 12:34:47PM +0100, Chris Wilson wrote: > Whilst discussing possible ways to trigger an invalidate_range on a > userptr with an aliased GGTT mmapping (and so cause a struct_mutex > deadlock), the conclusion is that we can, and we must, prevent any > possible deadlock by avoiding taking the mutex at all during > invalidate_range. This has numerous advantages all of which stem from > avoid the sleeping function from inside the unknown context. In > particular, it simplifies the invalidate_range because we no longer > have to juggle the spinlock/mutex and can just hold the spinlock > for the entire walk. To compensate, we have to make get_pages a bit more > complicated in order to serialise with a pending cancel_userptr worker. > As we hold the struct_mutex, we have no choice but to return EAGAIN and > hope that the worker is then flushed before we retry after reacquiring > the struct_mutex. > > The important caveat is that the invalidate_range itself is no longer > synchronous. There exists a small but definite period in time in which > the old PTE's page remain accessible via the GPU. Note however that the > physical pages themselves are not invalidated by the mmu_notifier, just > the CPU view of the address space. The impact should be limited to a > delay in pages being flushed, rather than a possibility of writing to > the wrong pages. The only race condition that this worsens is remapping > an userptr active on the GPU where fresh work may still reference the > old pages due to struct_mutex contention. Given that userspace is racing > with the GPU, it is fair to say that the results are undefined. > > v2: Only queue (and importantly only take one refcnt) the worker once. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Michał Winiarski <michal.winiarski@xxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Pulled in all 3 patches. Btw some pretty kerneldoc explaining the high-level interactions would be neat for all the userptr stuff ... I'm totally lost in i915_gem_userptr.c ;-) Thanks, Daniel > --- > drivers/gpu/drm/i915/i915_gem_userptr.c | 148 +++++++++++++------------------- > 1 file changed, 61 insertions(+), 87 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > index 161f7fbf5b76..1b3b451b6658 100644 > --- a/drivers/gpu/drm/i915/i915_gem_userptr.c > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > @@ -50,7 +50,6 @@ struct i915_mmu_notifier { > struct mmu_notifier mn; > struct rb_root objects; > struct list_head linear; > - unsigned long serial; > bool has_linear; > }; > > @@ -59,14 +58,16 @@ struct i915_mmu_object { > struct interval_tree_node it; > struct list_head link; > struct drm_i915_gem_object *obj; > + struct work_struct work; > bool active; > bool is_linear; > }; > > -static unsigned long cancel_userptr(struct drm_i915_gem_object *obj) > +static void __cancel_userptr__worker(struct work_struct *work) > { > + struct i915_mmu_object *mo = container_of(work, typeof(*mo), work); > + struct drm_i915_gem_object *obj = mo->obj; > struct drm_device *dev = obj->base.dev; > - unsigned long end; > > mutex_lock(&dev->struct_mutex); > /* Cancel any active worker and force us to re-evaluate gup */ > @@ -89,46 +90,28 @@ static unsigned long cancel_userptr(struct drm_i915_gem_object *obj) > dev_priv->mm.interruptible = was_interruptible; > } > > - end = obj->userptr.ptr + obj->base.size; > - > drm_gem_object_unreference(&obj->base); > mutex_unlock(&dev->struct_mutex); > - > - return end; > } > > -static void *invalidate_range__linear(struct i915_mmu_notifier *mn, > - struct mm_struct *mm, > - unsigned long start, > - unsigned long end) > +static unsigned long cancel_userptr(struct i915_mmu_object *mo) > { > - struct i915_mmu_object *mo; > - unsigned long serial; > - > -restart: > - serial = mn->serial; > - list_for_each_entry(mo, &mn->linear, link) { > - struct drm_i915_gem_object *obj; > - > - if (mo->it.last < start || mo->it.start > end) > - continue; > - > - obj = mo->obj; > - > - if (!mo->active || > - !kref_get_unless_zero(&obj->base.refcount)) > - continue; > - > - spin_unlock(&mn->lock); > - > - cancel_userptr(obj); > - > - spin_lock(&mn->lock); > - if (serial != mn->serial) > - goto restart; > + unsigned long end = mo->obj->userptr.ptr + mo->obj->base.size; > + > + /* The mmu_object is released late when destroying the > + * GEM object so it is entirely possible to gain a > + * reference on an object in the process of being freed > + * since our serialisation is via the spinlock and not > + * the struct_mutex - and consequently use it after it > + * is freed and then double free it. > + */ > + if (mo->active && kref_get_unless_zero(&mo->obj->base.refcount)) { > + schedule_work(&mo->work); > + /* only schedule one work packet to avoid the refleak */ > + mo->active = false; > } > > - return NULL; > + return end; > } > > static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, > @@ -136,45 +119,32 @@ static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, > unsigned long start, > unsigned long end) > { > - struct i915_mmu_notifier *mn = container_of(_mn, struct i915_mmu_notifier, mn); > - struct interval_tree_node *it = NULL; > - unsigned long next = start; > - unsigned long serial = 0; > - > - end--; /* interval ranges are inclusive, but invalidate range is exclusive */ > - while (next < end) { > - struct drm_i915_gem_object *obj = NULL; > - > - spin_lock(&mn->lock); > - if (mn->has_linear) > - it = invalidate_range__linear(mn, mm, start, end); > - else if (serial == mn->serial) > - it = interval_tree_iter_next(it, next, end); > - else > - it = interval_tree_iter_first(&mn->objects, start, end); > - if (it != NULL) { > - struct i915_mmu_object *mo = > - container_of(it, struct i915_mmu_object, it); > - > - /* The mmu_object is released late when destroying the > - * GEM object so it is entirely possible to gain a > - * reference on an object in the process of being freed > - * since our serialisation is via the spinlock and not > - * the struct_mutex - and consequently use it after it > - * is freed and then double free it. > - */ > - if (mo->active && > - kref_get_unless_zero(&mo->obj->base.refcount)) > - obj = mo->obj; > - > - serial = mn->serial; > + struct i915_mmu_notifier *mn = > + container_of(_mn, struct i915_mmu_notifier, mn); > + struct i915_mmu_object *mo; > + > + /* interval ranges are inclusive, but invalidate range is exclusive */ > + end--; > + > + spin_lock(&mn->lock); > + if (mn->has_linear) { > + list_for_each_entry(mo, &mn->linear, link) { > + if (mo->it.last < start || mo->it.start > end) > + continue; > + > + cancel_userptr(mo); > } > - spin_unlock(&mn->lock); > - if (obj == NULL) > - return; > + } else { > + struct interval_tree_node *it; > > - next = cancel_userptr(obj); > + it = interval_tree_iter_first(&mn->objects, start, end); > + while (it) { > + mo = container_of(it, struct i915_mmu_object, it); > + start = cancel_userptr(mo); > + it = interval_tree_iter_next(it, start, end); > + } > } > + spin_unlock(&mn->lock); > } > > static const struct mmu_notifier_ops i915_gem_userptr_notifier = { > @@ -194,7 +164,6 @@ i915_mmu_notifier_create(struct mm_struct *mm) > spin_lock_init(&mn->lock); > mn->mn.ops = &i915_gem_userptr_notifier; > mn->objects = RB_ROOT; > - mn->serial = 1; > INIT_LIST_HEAD(&mn->linear); > mn->has_linear = false; > > @@ -208,12 +177,6 @@ i915_mmu_notifier_create(struct mm_struct *mm) > return mn; > } > > -static void __i915_mmu_notifier_update_serial(struct i915_mmu_notifier *mn) > -{ > - if (++mn->serial == 0) > - mn->serial = 1; > -} > - > static int > i915_mmu_notifier_add(struct drm_device *dev, > struct i915_mmu_notifier *mn, > @@ -260,10 +223,9 @@ i915_mmu_notifier_add(struct drm_device *dev, > } else > interval_tree_insert(&mo->it, &mn->objects); > > - if (ret == 0) { > + if (ret == 0) > list_add(&mo->link, &mn->linear); > - __i915_mmu_notifier_update_serial(mn); > - } > + > spin_unlock(&mn->lock); > mutex_unlock(&dev->struct_mutex); > > @@ -291,7 +253,6 @@ i915_mmu_notifier_del(struct i915_mmu_notifier *mn, > mn->has_linear = i915_mmu_notifier_has_linear(mn); > else > interval_tree_remove(&mo->it, &mn->objects); > - __i915_mmu_notifier_update_serial(mn); > spin_unlock(&mn->lock); > } > > @@ -358,6 +319,7 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj, > mo->it.start = obj->userptr.ptr; > mo->it.last = mo->it.start + obj->base.size - 1; > mo->obj = obj; > + INIT_WORK(&mo->work, __cancel_userptr__worker); > > ret = i915_mmu_notifier_add(obj->base.dev, mn, mo); > if (ret) { > @@ -566,10 +528,12 @@ __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj, > return ret; > } > > -static void > +static int > __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, > bool value) > { > + int ret = 0; > + > /* During mm_invalidate_range we need to cancel any userptr that > * overlaps the range being invalidated. Doing so requires the > * struct_mutex, and that risks recursion. In order to cause > @@ -582,12 +546,20 @@ __i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, > */ > #if defined(CONFIG_MMU_NOTIFIER) > if (obj->userptr.mmu_object == NULL) > - return; > + return 0; > > spin_lock(&obj->userptr.mmu_object->mn->lock); > - obj->userptr.mmu_object->active = value; > + /* In order to serialise get_pages with an outstanding > + * cancel_userptr, we must drop the struct_mutex and try again. > + */ > + if (!value || !work_pending(&obj->userptr.mmu_object->work)) > + obj->userptr.mmu_object->active = value; > + else > + ret = -EAGAIN; > spin_unlock(&obj->userptr.mmu_object->mn->lock); > #endif > + > + return ret; > } > > static void > @@ -736,7 +708,9 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) > return -EAGAIN; > > /* Let the mmu-notifier know that we have begun and need cancellation */ > - __i915_gem_userptr_set_active(obj, true); > + ret = __i915_gem_userptr_set_active(obj, true); > + if (ret) > + return ret; > > pvec = NULL; > pinned = 0; > -- > 2.6.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx