Re: [PATCH 04/25] drm/i915/userptr: Avoid struct_mutex recursion for mmu_invalidate_range_start

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Mon, 5 Nov 2018 16:45:06 +0000

On 02/11/2018 16:12, Chris Wilson wrote:
Since commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") we have been able to report failure from
mmu_invalidate_range_start which allows us to use a trylock on the
struct_mutex to avoid potential recursion and report -EBUSY instead.
Furthermore, this allows us to pull the work into the main callback and
avoid the sleight-of-hand in using a workqueue to avoid lockdep.

However, not all paths to mmu_invalidate_range_start are prepared to
handle failure, so instead of reporting the recursion, deal with it.

Judging by the code below non-blockable paths can handle failure but 
blockable can not? Right, now that I read the invalidate_range_start api 
docs that seems to be the case. So that sounds like blockable brings us 
marginal benefits, if any, on the design level. Which is why I suppose 
this patch looks quite big. Lets see..


Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108375
References: 93065ac753e4 ("mm, oom: distinguish blockable moe for mmu notifiers")
Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_drv.h         |   4 +-
  drivers/gpu/drm/i915/i915_gem.c         |  18 +-
  drivers/gpu/drm/i915/i915_gem_object.h  |   7 +
  drivers/gpu/drm/i915/i915_gem_userptr.c | 217 +++++++++++-------------
  4 files changed, 120 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a88a7eb871b..1056b12c3bc8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3073,8 +3073,8 @@ enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */
  	I915_MM_SHRINKER
  };
  
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				 enum i915_mm_subclass subclass);
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass);
  void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj);
  
  enum i915_map_type {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 93d09282710d..9a8af9454a53 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2429,8 +2429,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
  	struct sg_table *pages;
  
  	pages = fetch_and_zero(&obj->mm.pages);
-	if (!pages)
-		return NULL;
+	if (IS_ERR_OR_NULL(pages))
+		return pages; >
  	spin_lock(&i915->mm.obj_lock);
  	list_del(&obj->mm.link);
@@ -2454,17 +2454,16 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
  	return pages;
  }
  
-void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
-				 enum i915_mm_subclass subclass)
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
+				enum i915_mm_subclass subclass)
  {
  	struct sg_table *pages;
+	int ret = -EBUSY;
  
  	if (i915_gem_object_has_pinned_pages(obj))
-		return;
+		return -EBUSY;
  
  	GEM_BUG_ON(obj->bind_count);
-	if (!i915_gem_object_has_pages(obj))
-		return;

Unrelated to this patch?

  
  	/* May be called by shrinker from within get_pages() (on another bo) */
  	mutex_lock_nested(&obj->mm.lock, subclass);
@@ -2477,11 +2476,16 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
  	 * lists early.
  	 */
  	pages = __i915_gem_object_unset_pages(obj);
+	if (!pages && !i915_gem_object_needs_async_cancel(obj))
+		pages = ERR_PTR(-EINVAL);

(Hmm yeah, this path did not used to handle the existing possible NULL 
pages here.)

Please put a blurb in the commit message on the high to medium level 
design of the change.

  	if (!IS_ERR(pages))
  		obj->ops->put_pages(obj, pages);
  
+	ret = 0;
  unlock:
  	mutex_unlock(&obj->mm.lock);
+
+	return ret;
  }
  
  bool i915_sg_trim(struct sg_table *orig_st)
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index a6dd7c46de0d..49ce797173b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -56,6 +56,7 @@ struct drm_i915_gem_object_ops {
  #define I915_GEM_OBJECT_HAS_STRUCT_PAGE	BIT(0)
  #define I915_GEM_OBJECT_IS_SHRINKABLE	BIT(1)
  #define I915_GEM_OBJECT_IS_PROXY	BIT(2)
+#define I915_GEM_OBJECT_ASYNC_CANCEL	BIT(3)
  
  	/* Interface between the GEM object and its backing storage.
  	 * get_pages() is called once prior to the use of the associated set
@@ -386,6 +387,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
  	return obj->ops->flags & I915_GEM_OBJECT_IS_PROXY;
  }
  
+static inline bool
+i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
+}
+
  static inline bool
  i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
  {
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2c9b284036d1..ab5ae426e27b 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -50,79 +50,84 @@ struct i915_mmu_notifier {
  	struct hlist_node node;
  	struct mmu_notifier mn;
  	struct rb_root_cached objects;
-	struct workqueue_struct *wq;
+	struct i915_mm_struct *mm;
  };
  
  struct i915_mmu_object {
  	struct i915_mmu_notifier *mn;
  	struct drm_i915_gem_object *obj;
  	struct interval_tree_node it;
-	struct list_head link;
-	struct work_struct work;
-	bool attached;
  };
  
-static void cancel_userptr(struct work_struct *work)
-{
-	struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
-	struct drm_i915_gem_object *obj = mo->obj;
-	struct work_struct *active;
-
-	/* Cancel any active worker and force us to re-evaluate gup */
-	mutex_lock(&obj->mm.lock);
-	active = fetch_and_zero(&obj->userptr.work);
-	mutex_unlock(&obj->mm.lock);
-	if (active)
-		goto out;
-
-	i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
-
-	mutex_lock(&obj->base.dev->struct_mutex);
-
-	/* We are inside a kthread context and can't be interrupted */
-	if (i915_gem_object_unbind(obj) == 0)
-		__i915_gem_object_put_pages(obj, I915_MM_NORMAL);
-	WARN_ONCE(i915_gem_object_has_pages(obj),
-		  "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_global=%d\n",
-		  obj->bind_count,
-		  atomic_read(&obj->mm.pages_pin_count),
-		  obj->pin_global);
-
-	mutex_unlock(&obj->base.dev->struct_mutex);
-
-out:
-	i915_gem_object_put(obj);
-}
-
  static void add_object(struct i915_mmu_object *mo)
  {
-	if (mo->attached)
+	if (!RB_EMPTY_NODE(&mo->it.rb))
  		return;
  
  	interval_tree_insert(&mo->it, &mo->mn->objects);
-	mo->attached = true;
  }
  
  static void del_object(struct i915_mmu_object *mo)
  {
-	if (!mo->attached)
+	if (RB_EMPTY_NODE(&mo->it.rb))
  		return;
  
  	interval_tree_remove(&mo->it, &mo->mn->objects);
-	mo->attached = false;
+	RB_CLEAR_NODE(&mo->it.rb);
+}
+
+static void
+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+	struct i915_mmu_object *mo = obj->userptr.mmu_object;
+
+	/*
+	 * During mm_invalidate_range we need to cancel any userptr that
+	 * overlaps the range being invalidated. Doing so requires the
+	 * struct_mutex, and that risks recursion. In order to cause
+	 * recursion, the user must alias the userptr address space with
+	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
+	 * to invalidate that mmaping, mm_invalidate_range is called with
+	 * the userptr address *and* the struct_mutex held.  To prevent that
+	 * we set a flag under the i915_mmu_notifier spinlock to indicate
+	 * whether this object is valid.
+	 */
+	if (!mo)
+		return;
+
+	spin_lock(&mo->mn->lock);
+	if (value)
+		add_object(mo);
+	else
+		del_object(mo);
+	spin_unlock(&mo->mn->lock);
+}
+
+static struct mutex *__i915_mutex_lock_recursive(struct mutex *m)
+{
+	switch (mutex_trylock_recursive(m)) {
+	default:
+	case MUTEX_TRYLOCK_FAILED:
+		mutex_lock(m);
+	case MUTEX_TRYLOCK_SUCCESS:
+		return m;
+
+	case MUTEX_TRYLOCK_RECURSIVE:
+		return ERR_PTR(-EEXIST);

This suggest we will break the api requirement to return always 
invalidate in the blockable case.

+	}
  }
  
  static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
-						       struct mm_struct *mm,
-						       unsigned long start,
-						       unsigned long end,
-						       bool blockable)
+						      struct mm_struct *mm,
+						      unsigned long start,
+						      unsigned long end,
+						      bool blockable)
  {
  	struct i915_mmu_notifier *mn =
  		container_of(_mn, struct i915_mmu_notifier, mn);
-	struct i915_mmu_object *mo;
  	struct interval_tree_node *it;
-	LIST_HEAD(cancelled);
+	struct mutex *unlock = NULL;
+	int ret = 0;
  
  	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
  		return 0;
@@ -133,11 +138,15 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
  	spin_lock(&mn->lock);
  	it = interval_tree_iter_first(&mn->objects, start, end);
  	while (it) {
+		struct drm_i915_gem_object *obj;
+
  		if (!blockable) {
-			spin_unlock(&mn->lock);
-			return -EAGAIN;
+			ret = -EAGAIN;
+			break;
  		}
-		/* The mmu_object is released late when destroying the
+
+		/*
+		 * The mmu_object is released late when destroying the
  		 * GEM object so it is entirely possible to gain a
  		 * reference on an object in the process of being freed
  		 * since our serialisation is via the spinlock and not
@@ -146,21 +155,33 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
  		 * use-after-free we only acquire a reference on the
  		 * object if it is not in the process of being destroyed.
  		 */
-		mo = container_of(it, struct i915_mmu_object, it);
-		if (kref_get_unless_zero(&mo->obj->base.refcount))
-			queue_work(mn->wq, &mo->work);
-
-		list_add(&mo->link, &cancelled);
-		it = interval_tree_iter_next(it, start, end);
+		obj = container_of(it, struct i915_mmu_object, it)->obj;
+		if (!kref_get_unless_zero(&obj->base.refcount)) {
+			it = interval_tree_iter_next(it, start, end);
+			continue;
+		}
+		spin_unlock(&mn->lock);
+
+		if (!unlock)
+			unlock = __i915_mutex_lock_recursive(&mn->mm->i915->drm.struct_mutex);

Hmm .. but we proceed regardless of the trylock result and don't even 
bother looking at it. I don't get it. I stop here since it doesn't make 
sense to me at this moment.

Regards,

Tvrtko

+		ret = i915_gem_object_unbind(obj);
+		if (ret == 0)
+			ret = __i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
+		i915_gem_object_put(obj);
+		if (ret)
+			goto unlock;
+
+		spin_lock(&mn->lock);
+		it = interval_tree_iter_first(&mn->objects, start, end);
  	}
-	list_for_each_entry(mo, &cancelled, link)
-		del_object(mo);
  	spin_unlock(&mn->lock);
  
-	if (!list_empty(&cancelled))
-		flush_workqueue(mn->wq);
+unlock:
+	if (!IS_ERR_OR_NULL(unlock))
+		mutex_unlock(unlock);
+
+	return ret;
  
-	return 0;
  }
  
  static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
@@ -168,7 +189,7 @@ static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
  };
  
  static struct i915_mmu_notifier *
-i915_mmu_notifier_create(struct mm_struct *mm)
+i915_mmu_notifier_create(struct i915_mm_struct *mm)
  {
  	struct i915_mmu_notifier *mn;
  
@@ -179,13 +200,7 @@ i915_mmu_notifier_create(struct mm_struct *mm)
  	spin_lock_init(&mn->lock);
  	mn->mn.ops = &i915_gem_userptr_notifier;
  	mn->objects = RB_ROOT_CACHED;
-	mn->wq = alloc_workqueue("i915-userptr-release",
-				 WQ_UNBOUND | WQ_MEM_RECLAIM,
-				 0);
-	if (mn->wq == NULL) {
-		kfree(mn);
-		return ERR_PTR(-ENOMEM);
-	}
+	mn->mm = mm;
  
  	return mn;
  }
@@ -195,16 +210,14 @@ i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
  {
  	struct i915_mmu_object *mo;
  
-	mo = obj->userptr.mmu_object;
-	if (mo == NULL)
+	mo = fetch_and_zero(&obj->userptr.mmu_object);
+	if (!mo)
  		return;
  
  	spin_lock(&mo->mn->lock);
  	del_object(mo);
  	spin_unlock(&mo->mn->lock);
  	kfree(mo);
-
-	obj->userptr.mmu_object = NULL;
  }
  
  static struct i915_mmu_notifier *
@@ -217,7 +230,7 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
  	if (mn)
  		return mn;
  
-	mn = i915_mmu_notifier_create(mm->mm);
+	mn = i915_mmu_notifier_create(mm);
  	if (IS_ERR(mn))
  		err = PTR_ERR(mn);
  
@@ -240,10 +253,8 @@ i915_mmu_notifier_find(struct i915_mm_struct *mm)
  	mutex_unlock(&mm->i915->mm_lock);
  	up_write(&mm->mm->mmap_sem);
  
-	if (mn && !IS_ERR(mn)) {
-		destroy_workqueue(mn->wq);
+	if (mn && !IS_ERR(mn))
  		kfree(mn);
-	}
  
  	return err ? ERR_PTR(err) : mm->mn;
  }
@@ -266,14 +277,14 @@ i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj,
  		return PTR_ERR(mn);
  
  	mo = kzalloc(sizeof(*mo), GFP_KERNEL);
-	if (mo == NULL)
+	if (!mo)
  		return -ENOMEM;
  
  	mo->mn = mn;
  	mo->obj = obj;
  	mo->it.start = obj->userptr.ptr;
  	mo->it.last = obj->userptr.ptr + obj->base.size - 1;
-	INIT_WORK(&mo->work, cancel_userptr);
+	RB_CLEAR_NODE(&mo->it.rb);
  
  	obj->userptr.mmu_object = mo;
  	return 0;
@@ -287,12 +298,16 @@ i915_mmu_notifier_free(struct i915_mmu_notifier *mn,
  		return;
  
  	mmu_notifier_unregister(&mn->mn, mm);
-	destroy_workqueue(mn->wq);
  	kfree(mn);
  }
  
  #else
  
+static void
+__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj, bool value)
+{
+}
+
  static void
  i915_gem_userptr_release__mmu_notifier(struct drm_i915_gem_object *obj)
  {
@@ -461,42 +476,6 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
  	return st;
  }
  
-static int
-__i915_gem_userptr_set_active(struct drm_i915_gem_object *obj,
-			      bool value)
-{
-	int ret = 0;
-
-	/* During mm_invalidate_range we need to cancel any userptr that
-	 * overlaps the range being invalidated. Doing so requires the
-	 * struct_mutex, and that risks recursion. In order to cause
-	 * recursion, the user must alias the userptr address space with
-	 * a GTT mmapping (possible with a MAP_FIXED) - then when we have
-	 * to invalidate that mmaping, mm_invalidate_range is called with
-	 * the userptr address *and* the struct_mutex held.  To prevent that
-	 * we set a flag under the i915_mmu_notifier spinlock to indicate
-	 * whether this object is valid.
-	 */
-#if defined(CONFIG_MMU_NOTIFIER)
-	if (obj->userptr.mmu_object == NULL)
-		return 0;
-
-	spin_lock(&obj->userptr.mmu_object->mn->lock);
-	/* In order to serialise get_pages with an outstanding
-	 * cancel_userptr, we must drop the struct_mutex and try again.
-	 */
-	if (!value)
-		del_object(obj->userptr.mmu_object);
-	else if (!work_pending(&obj->userptr.mmu_object->work))
-		add_object(obj->userptr.mmu_object);
-	else
-		ret = -EAGAIN;
-	spin_unlock(&obj->userptr.mmu_object->mn->lock);
-#endif
-
-	return ret;
-}
-
  static void
  __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
  {
@@ -682,8 +661,11 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
  	struct sgt_iter sgt_iter;
  	struct page *page;
  
-	BUG_ON(obj->userptr.work != NULL);
+	/* Cancel any inflight work and force them to restart their gup */
+	obj->userptr.work = NULL;
  	__i915_gem_userptr_set_active(obj, false);
+	if (!pages)
+		return;
  
  	if (obj->mm.madv != I915_MADV_WILLNEED)
  		obj->mm.dirty = false;
@@ -721,7 +703,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
  
  static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
  	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
-		 I915_GEM_OBJECT_IS_SHRINKABLE,
+		 I915_GEM_OBJECT_IS_SHRINKABLE |
+		 I915_GEM_OBJECT_ASYNC_CANCEL,
  	.get_pages = i915_gem_userptr_get_pages,
  	.put_pages = i915_gem_userptr_put_pages,
  	.dmabuf_export = i915_gem_userptr_dmabuf_export,

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx