Re: [PATCH 27/39] drm/i915: Move vma lookup to its own lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 02/01/2019 09:41, Chris Wilson wrote:
Remove the struct_mutex requirement for looking up the vma for an
object.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_debugfs.c       |  6 +--
  drivers/gpu/drm/i915/i915_gem.c           | 33 +++++++------
  drivers/gpu/drm/i915/i915_gem_object.h    | 45 ++++++++++-------
  drivers/gpu/drm/i915/i915_vma.c           | 60 +++++++++++++++--------
  drivers/gpu/drm/i915/i915_vma.h           |  2 +-
  drivers/gpu/drm/i915/selftests/i915_vma.c |  4 +-
  6 files changed, 92 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5e2d5c8d7e02..8059f6dd3886 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -159,14 +159,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
  		   obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : "");
  	if (obj->base.name)
  		seq_printf(m, " (name: %d)", obj->base.name);
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
  		if (i915_vma_is_pinned(vma))
  			pin_count++;
  	}
  	seq_printf(m, " (pinned x %d)", pin_count);
  	if (obj->pin_global)
  		seq_printf(m, " (global)");
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
  		if (!drm_mm_node_allocated(&vma->node))
  			continue;
@@ -322,7 +322,7 @@ static int per_file_stats(int id, void *ptr, void *data)
  	if (obj->base.name || obj->base.dma_buf)
  		stats->shared += obj->base.size;
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
  		if (!drm_mm_node_allocated(&vma->node))
  			continue;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a954e15c0315..e42ad20d6328 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -438,15 +438,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
  	if (ret)
  		return ret;
- while ((vma = list_first_entry_or_null(&obj->vma_list,
-					       struct i915_vma,
-					       obj_link))) {
+	spin_lock(&obj->vma.lock);
+	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
+						       struct i915_vma,
+						       obj_link))) {
  		list_move_tail(&vma->obj_link, &still_in_list);
+		spin_unlock(&obj->vma.lock);
+
  		ret = i915_vma_unbind(vma);
-		if (ret)
-			break;
+
+		spin_lock(&obj->vma.lock);
  	}
-	list_splice(&still_in_list, &obj->vma_list);
+	list_splice(&still_in_list, &obj->vma.list);
+	spin_unlock(&obj->vma.lock);
return ret;
  }
@@ -3640,7 +3644,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
  	 * reading an invalid PTE on older architectures.
  	 */
  restart:
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
  		if (!drm_mm_node_allocated(&vma->node))
  			continue;
@@ -3718,7 +3722,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
  			 */
  		}
- list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		list_for_each_entry(vma, &obj->vma.list, obj_link) {
  			if (!drm_mm_node_allocated(&vma->node))
  				continue;
@@ -3728,7 +3732,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
  		}
  	}
- list_for_each_entry(vma, &obj->vma_list, obj_link)
+	list_for_each_entry(vma, &obj->vma.list, obj_link)
  		vma->node.color = cache_level;
  	i915_gem_object_set_cache_coherency(obj, cache_level);
  	obj->cache_dirty = true; /* Always invalidate stale cachelines */
@@ -4304,7 +4308,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
  {
  	mutex_init(&obj->mm.lock);
- INIT_LIST_HEAD(&obj->vma_list);
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
  	INIT_LIST_HEAD(&obj->lut_list);
  	INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -4470,14 +4476,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
  		mutex_lock(&i915->drm.struct_mutex);
GEM_BUG_ON(i915_gem_object_is_active(obj));
-		list_for_each_entry_safe(vma, vn,
-					 &obj->vma_list, obj_link) {
+		list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
  			GEM_BUG_ON(i915_vma_is_active(vma));
  			vma->flags &= ~I915_VMA_PIN_MASK;
  			i915_vma_destroy(vma);
  		}
-		GEM_BUG_ON(!list_empty(&obj->vma_list));
-		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree));
+		GEM_BUG_ON(!list_empty(&obj->vma.list));
+		GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree));
/* This serializes freeing with the shrinker. Since the free
  		 * is delayed, first by RCU then by the workqueue, we want the
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 49ce797173b5..151453f0f951 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -86,24 +86,33 @@ struct drm_i915_gem_object {
const struct drm_i915_gem_object_ops *ops; - /**
-	 * @vma_list: List of VMAs backed by this object
-	 *
-	 * The VMA on this list are ordered by type, all GGTT vma are placed
-	 * at the head and all ppGTT vma are placed at the tail. The different
-	 * types of GGTT vma are unordered between themselves, use the
-	 * @vma_tree (which has a defined order between all VMA) to find an
-	 * exact match.
-	 */
-	struct list_head vma_list;
-	/**
-	 * @vma_tree: Ordered tree of VMAs backed by this object
-	 *
-	 * All VMA created for this object are placed in the @vma_tree for
-	 * fast retrieval via a binary search in i915_vma_instance().
-	 * They are also added to @vma_list for easy iteration.
-	 */
-	struct rb_root vma_tree;
+	struct {
+		/**
+		 * @vma.lock: protect the list/tre of vmas

tree

+		 */
+		struct spinlock lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
/**
  	 * @lut_list: List of vma lookup entries in use for this object.
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ad76a3309830..55cabb162fe3 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -187,32 +187,47 @@ vma_create(struct drm_i915_gem_object *obj,
  								i915_gem_object_get_stride(obj));
  		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
- /*
-		 * We put the GGTT vma at the start of the vma-list, followed
-		 * by the ppGGTT vma. This allows us to break early when
-		 * iterating over only the GGTT vma for an object, see
-		 * for_each_ggtt_vma()
-		 */
  		vma->flags |= I915_VMA_GGTT;
-		list_add(&vma->obj_link, &obj->vma_list);
-	} else {
-		list_add_tail(&vma->obj_link, &obj->vma_list);
  	}
+ spin_lock(&obj->vma.lock);
+
  	rb = NULL;
-	p = &obj->vma_tree.rb_node;
+	p = &obj->vma.tree.rb_node;
  	while (*p) {
  		struct i915_vma *pos;
+		long cmp;
rb = *p;
  		pos = rb_entry(rb, struct i915_vma, obj_node);
-		if (i915_vma_compare(pos, vm, view) < 0)
+
+		cmp = i915_vma_compare(pos, vm, view);
+		if (cmp == 0) {
+			spin_unlock(&obj->vma.lock);
+			kmem_cache_free(vm->i915->vmas, vma);
+			return pos;

Single unlock & free would be possible with a goto here but perhaps this is clearer.

+		}
+
+		if (cmp < 0)

else if?

  			p = &rb->rb_right;
  		else
  			p = &rb->rb_left;
  	}
  	rb_link_node(&vma->obj_node, rb, p);
-	rb_insert_color(&vma->obj_node, &obj->vma_tree);
+	rb_insert_color(&vma->obj_node, &obj->vma.tree);
+
+	if (i915_vma_is_ggtt(vma))
+		/*
+		 * We put the GGTT vma at the start of the vma-list, followed
+		 * by the ppGGTT vma. This allows us to break early when
+		 * iterating over only the GGTT vma for an object, see
+		 * for_each_ggtt_vma()
+		 */
+		list_add(&vma->obj_link, &obj->vma.list);
+	else
+		list_add_tail(&vma->obj_link, &obj->vma.list);
+
+	spin_unlock(&obj->vma.lock);
mutex_lock(&vm->mutex);
  	list_add_tail(&vma->vm_link, &vm->vma_list);
@@ -232,7 +247,7 @@ vma_lookup(struct drm_i915_gem_object *obj,
  {
  	struct rb_node *rb;


Assert vma.lock held would be good here.


-	rb = obj->vma_tree.rb_node;
+	rb = obj->vma.tree.rb_node;
  	while (rb) {
  		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
  		long cmp;
@@ -272,16 +287,17 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
  {
  	struct i915_vma *vma;
- lockdep_assert_held(&obj->base.dev->struct_mutex);
  	GEM_BUG_ON(view && !i915_is_ggtt(vm));
  	GEM_BUG_ON(vm->closed);
+ spin_lock(&obj->vma.lock);
  	vma = vma_lookup(obj, vm, view);
-	if (!vma)
+	spin_unlock(&obj->vma.lock);
+
+	if (unlikely(!vma))
  		vma = vma_create(obj, vm, view);

lookup -> drop lock -> create is racy. Needs a comment why that's okay, if it is okay. Otherwise a bit more refactoring to make the sequence atomic.

GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view));
-	GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma);
  	return vma;
  }
@@ -803,14 +819,18 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); - list_del(&vma->obj_link);
-
  	mutex_lock(&vma->vm->mutex);
  	list_del(&vma->vm_link);
  	mutex_unlock(&vma->vm->mutex);
- if (vma->obj)
-		rb_erase(&vma->obj_node, &vma->obj->vma_tree);
+	if (vma->obj) {

Perplexing.. we have vma->obj == NULL somewhere or at some point?

+		struct drm_i915_gem_object *obj = vma->obj;
+
+		spin_lock(&obj->vma.lock);
+		list_del(&vma->obj_link);
+		rb_erase(&vma->obj_node, &vma->obj->vma.tree);
+		spin_unlock(&obj->vma.lock);
+	}
rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
  		GEM_BUG_ON(i915_gem_active_isset(&iter->base));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 4f7c1c7599f4..7252abc73d3e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915);
   * or the list is empty ofc.
   */
  #define for_each_ggtt_vma(V, OBJ) \
-	list_for_each_entry(V, &(OBJ)->vma_list, obj_link)		\
+	list_for_each_entry(V, &(OBJ)->vma.list, obj_link)		\
  		for_each_until(!i915_vma_is_ggtt(V))
#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c
index ffa74290e054..f1008b07dfd2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_vma.c
+++ b/drivers/gpu/drm/i915/selftests/i915_vma.c
@@ -670,7 +670,7 @@ static int igt_vma_partial(void *arg)
  		}
count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
  			count++;
  		if (count != nvma) {
  			pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n",
@@ -699,7 +699,7 @@ static int igt_vma_partial(void *arg)
  		i915_vma_unpin(vma);
count = 0;
-		list_for_each_entry(vma, &obj->vma_list, obj_link)
+		list_for_each_entry(vma, &obj->vma.list, obj_link)
  			count++;
  		if (count != nvma) {
  			pr_err("(%s) allocated an extra full vma!\n", p->name);


At which point does a patch come along which actually stops taking struct mutex on some path covered by this new lock?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux