From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> A few details to hopefully make a very hot function a tiny bit more efficient: 1. Cast VM pointers before substraction to save the compiler doing a smart one which includes multiplication. 2. Use smaller type for comparison since we only care about the sign. 3. Prefer the ppgtt lookup branch and inline it, allowing the compiler to optimise out the second part of i915_vma_compare and save one call indirection. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 +++++++++++++++++------ drivers/gpu/drm/i915/i915_vma.h | 9 ++++++--- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ef00d36680c9..aa81945a608b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3338,17 +3338,17 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_ggtt_flush(dev_priv); } -struct i915_vma * -i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) +static inline struct i915_vma * +__i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct rb_node *rb; rb = obj->vma_tree.rb_node; while (rb) { struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); - long cmp; + int cmp; cmp = i915_vma_compare(vma, vm, view); if (cmp == 0) @@ -3363,6 +3363,14 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, return NULL; } +noinline struct i915_vma * +i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + return __i915_gem_obj_to_vma(obj, vm, view); +} + struct i915_vma * i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -3373,7 +3381,10 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); - vma = i915_gem_obj_to_vma(obj, vm, view); + if (likely(!view)) + vma = __i915_gem_obj_to_vma(obj, vm, NULL); + else + vma = i915_gem_obj_to_vma(obj, vm, view); if (!vma) { vma = i915_vma_create(obj, vm, view); GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 21be74c61065..098f206c1a4d 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -178,15 +178,18 @@ static inline void i915_vma_put(struct i915_vma *vma) i915_gem_object_put(vma->obj); } -static inline long +static inline int i915_vma_compare(struct i915_vma *vma, struct i915_address_space *vm, const struct i915_ggtt_view *view) { + long cmp; + GEM_BUG_ON(view && !i915_is_ggtt(vm)); - if (vma->vm != vm) - return vma->vm - vm; + cmp = (unsigned long)vma->vm - (unsigned long)vm; + if (cmp) + return cmp; if (!view) return vma->ggtt_view.type; -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx