[PATCH] drm/i915: Optimise VMA lookup slightly

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

A few details to hopefully make a very hot function a tiny bit
more efficient:

 1. Cast VM pointers before substraction to save the compiler
    doing a smart one which includes multiplication.

 2. Use smaller type for comparison since we only care about
    the sign.

 3. Prefer the ppgtt lookup branch and inline it, allowing the
    compiler to optimise out the second part of i915_vma_compare
    and save one call indirection.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 23 +++++++++++++++++------
 drivers/gpu/drm/i915/i915_vma.h     |  9 ++++++---
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ef00d36680c9..aa81945a608b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3338,17 +3338,17 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 	i915_ggtt_flush(dev_priv);
 }
 
-struct i915_vma *
-i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
-		    struct i915_address_space *vm,
-		    const struct i915_ggtt_view *view)
+static inline struct i915_vma *
+__i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+		      struct i915_address_space *vm,
+		      const struct i915_ggtt_view *view)
 {
 	struct rb_node *rb;
 
 	rb = obj->vma_tree.rb_node;
 	while (rb) {
 		struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node);
-		long cmp;
+		int cmp;
 
 		cmp = i915_vma_compare(vma, vm, view);
 		if (cmp == 0)
@@ -3363,6 +3363,14 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 	return NULL;
 }
 
+noinline struct i915_vma *
+i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+		    struct i915_address_space *vm,
+		    const struct i915_ggtt_view *view)
+{
+	return __i915_gem_obj_to_vma(obj, vm, view);
+}
+
 struct i915_vma *
 i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 				  struct i915_address_space *vm,
@@ -3373,7 +3381,10 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 	lockdep_assert_held(&obj->base.dev->struct_mutex);
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 
-	vma = i915_gem_obj_to_vma(obj, vm, view);
+	if (likely(!view))
+		vma = __i915_gem_obj_to_vma(obj, vm, NULL);
+	else
+		vma = i915_gem_obj_to_vma(obj, vm, view);
 	if (!vma) {
 		vma = i915_vma_create(obj, vm, view);
 		GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view));
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 21be74c61065..098f206c1a4d 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -178,15 +178,18 @@ static inline void i915_vma_put(struct i915_vma *vma)
 	i915_gem_object_put(vma->obj);
 }
 
-static inline long
+static inline int
 i915_vma_compare(struct i915_vma *vma,
 		 struct i915_address_space *vm,
 		 const struct i915_ggtt_view *view)
 {
+	long cmp;
+
 	GEM_BUG_ON(view && !i915_is_ggtt(vm));
 
-	if (vma->vm != vm)
-		return vma->vm - vm;
+	cmp = (unsigned long)vma->vm - (unsigned long)vm;
+	if (cmp)
+		return cmp;
 
 	if (!view)
 		return vma->ggtt_view.type;
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux