Quoting Tvrtko Ursulin (2019-01-15 09:47:45) > > On 14/01/2019 21:17, Chris Wilson wrote: > > Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu > > notifiers") we have been able to report failure from > > mmu_invalidate_range_start which allows us to use a trylock on the > > struct_mutex to avoid potential recursion and report -EBUSY instead. > > Furthermore, this allows us to pull the work into the main callback and > > avoid the sleight-of-hand in using a workqueue to avoid lockdep. > > > > However, not all paths to mmu_invalidate_range_start are prepared to > > handle failure, so instead of reporting the recursion, deal with it by > > propagating the failure upwards, who can decide themselves to handle it > > or report it. > > > > v2: Mark up the recursive lock behaviour and comment on the various weak > > points. > > > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375 > > References: 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu notifiers") > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > --- > > drivers/gpu/drm/i915/i915_drv.h | 4 +- > > drivers/gpu/drm/i915/i915_gem.c | 30 +++- > > drivers/gpu/drm/i915/i915_gem_object.h | 7 + > > drivers/gpu/drm/i915/i915_gem_userptr.c | 220 +++++++++++------------- > > 4 files changed, 135 insertions(+), 126 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > > index fa99824f63b3..4b67037c0318 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.h > > +++ b/drivers/gpu/drm/i915/i915_drv.h > > @@ -2931,8 +2931,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock/struct_mutex */ > > I915_MM_SHRINKER /* called "recursively" from direct-reclaim-esque */ > > }; > > > > -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > - enum i915_mm_subclass subclass); > > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > + enum i915_mm_subclass subclass); > > void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); > > > > enum i915_map_type { > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index 0bfed33178e1..90c167f71345 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -2303,8 +2303,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) > > struct sg_table *pages; > > > > pages = fetch_and_zero(&obj->mm.pages); > > - if (!pages) > > - return NULL; > > + if (IS_ERR_OR_NULL(pages)) > > + return pages; > > > > spin_lock(&i915->mm.obj_lock); > > list_del(&obj->mm.link); > > @@ -2328,22 +2328,23 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) > > return pages; > > } > > > > -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > - enum i915_mm_subclass subclass) > > +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > + enum i915_mm_subclass subclass) > > { > > struct sg_table *pages; > > + int ret; > > > > if (i915_gem_object_has_pinned_pages(obj)) > > - return; > > + return -EBUSY; > > > > GEM_BUG_ON(obj->bind_count); > > - if (!i915_gem_object_has_pages(obj)) > > - return; > > > > /* May be called by shrinker from within get_pages() (on another bo) */ > > mutex_lock_nested(&obj->mm.lock, subclass); > > - if (unlikely(atomic_read(&obj->mm.pages_pin_count))) > > + if (unlikely(atomic_read(&obj->mm.pages_pin_count))) { > > + ret = -EBUSY; > > goto unlock; > > + } > > > > /* > > * ->put_pages might need to allocate memory for the bit17 swizzle > > @@ -2351,11 +2352,24 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, > > * lists early. > > */ > > pages = __i915_gem_object_unset_pages(obj); > > + > > + /* > > + * XXX Temporary hijinx to avoid updating all backends to handle > > + * NULL pages. In the future, when we have more asynchronous > > + * get_pages backends we should be better able to handle the > > + * cancellation of the async task in a more uniform manner. > > + */ > > + if (!pages && !i915_gem_object_needs_async_cancel(obj)) > > + pages = ERR_PTR(-EINVAL); > > + > > if (!IS_ERR(pages)) > > obj->ops->put_pages(obj, pages); > > > > + ret = 0; > > unlock: > > mutex_unlock(&obj->mm.lock); > > + > > + return ret; > > } > > > > bool i915_sg_trim(struct sg_table *orig_st) > > diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h > > index ff3da64470dd..cb1b0144d274 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_object.h > > +++ b/drivers/gpu/drm/i915/i915_gem_object.h > > @@ -57,6 +57,7 @@ struct drm_i915_gem_object_ops { > > #define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0) > > #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) > > #define I915_GEM_OBJECT_IS_PROXY BIT(2) > > +#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3) > > > > /* Interface between the GEM object and its backing storage. > > * get_pages() is called once prior to the use of the associated set > > @@ -387,6 +388,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj) > > return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY; > > } > > > > +static inline bool > > +i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) > > +{ > > + return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; > > +} > > + > > static inline bool > > i915_gem_object_is_active(const struct drm_i915_gem_object *obj) > > { > > diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c > > index 1fb6a7bb5054..9c2008a480e2 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_userptr.c > > +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c > > @@ -49,77 +49,81 @@ struct i915_mmu_notifier { > > struct hlist_node node; > > struct mmu_notifier mn; > > struct rb_root_cached objects; > > - struct workqueue_struct *wq; > > + struct i915_mm_struct *mm; > > }; > > > > struct i915_mmu_object { > > struct i915_mmu_notifier *mn; > > struct drm_i915_gem_object *obj; > > struct interval_tree_node it; > > - struct list_head link; > > - struct work_struct work; > > - bool attached; > > }; > > > > -static void cancel_userptr(struct work_struct *work) > > +static void add_object(struct i915_mmu_object *mo) > > { > > - struct i915_mmu_object *mo = container_of(work, typeof(*mo), work); > > - struct drm_i915_gem_object *obj = mo->obj; > > - struct work_struct *active; > > - > > - /* Cancel any active worker and force us to re-evaluate gup */ > > - mutex_lock(&obj->mm.lock); > > - active = fetch_and_zero(&obj->userptr.work); > > - mutex_unlock(&obj->mm.lock); > > - if (active) > > - goto out; > > - > > - i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); > > - > > - mutex_lock(&obj->base.dev->struct_mutex); > > - > > - /* We are inside a kthread context and can't be interrupted */ > > - if (i915_gem_object_unbind(obj) == 0) > > - __i915_gem_object_put_pages(obj, I915_MM_NORMAL); > > - WARN_ONCE(i915_gem_object_has_pages(obj), > > - "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n", > > - obj->bind_count, > > - atomic_read(&obj->mm.pages_pin_count), > > - obj->pin_global); > > - > > - mutex_unlock(&obj->base.dev->struct_mutex); > > - > > -out: > > - i915_gem_object_put(obj); > > + GEM_BUG_ON(!RB_EMPTY_NODE(&mo->it.rb)); > > + interval_tree_insert(&mo->it, &mo->mn->objects); > > } > > > > -static void add_object(struct i915_mmu_object *mo) > > +static void del_object(struct i915_mmu_object *mo) > > { > > - if (mo->attached) > > + if (RB_EMPTY_NODE(&mo->it.rb)) > > return; > > > > - interval_tree_insert(&mo->it, &mo->mn->objects); > > - mo->attached = true; > > + interval_tree_remove(&mo->it, &mo->mn->objects); > > + RB_CLEAR_NODE(&mo->it.rb); > > } > > > > -static void del_object(struct i915_mmu_object *mo) > > +static void > > +__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value) > > { > > - if (!mo->attached) > > + struct i915_mmu_object *mo = obj->userptr.mmu_object; > > + > > + /* > > + * During mm_invalidate_range we need to cancel any userptr that > > + * overlaps the range being invalidated. Doing so requires the > > + * struct_mutex, and that risks recursion. In order to cause > > + * recursion, the user must alias the userptr address space with > > + * a GTT mmapping (possible with a MAP_FIXED) - then when we have > > + * to invalidate that mmaping, mm_invalidate_range is called with > > + * the userptr address *and* the struct_mutex held. To prevent that > > + * we set a flag under the i915_mmu_notifier spinlock to indicate > > + * whether this object is valid. > > + */ > > + if (!mo) > > return; > > > > - interval_tree_remove(&mo->it, &mo->mn->objects); > > - mo->attached = false; > > + spin_lock(&mo->mn->lock); > > + if (value) > > + add_object(mo); > > + else > > + del_object(mo); > > + spin_unlock(&mo->mn->lock); > > } > > > > -static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn, > > - const struct mmu_notifier_range *range) > > +static struct mutex *__i915_mutex_lock_recursive(struct mutex *m) > > +{ > > + switch (mutex_trylock_recursive(m)) { > > + default: > > + case MUTEX_TRYLOCK_FAILED: > > + mutex_lock_nested(m, I915_MM_SHRINKER); > > Have you found in the meantime that this is actually needed? Although it > is since we use this subclass to put the pages.. Yes, if we do not attempt to try a full mutex_lock() here, we fail the invalidation subtests of gem_userptr_blits(). But you mean the subclass? Given enough time I'll find the path to break it, at the present time I feel more comfortable cargo-culting the rules devised for the recursive locking from inside direct-reclaim to another direct-reclaim path. The end-goal here is that upon breaking up struct_mutex we replace it various locks that we are not allowed to allocate under. No more direct reclaim recursion. > Should i915_gem_shrinker_tainst_mutex taint obj->mm.lock with the > shrinker subclass? The shrinker uses the obj->mm.lock I915_MM_SHRINKER subclass, so it's a little more faff to taint. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx