Since the introduction of being able to perform a lockless lookup of an object (i915_gem_object_get_rcu() in fbbd37b36fa5 ("drm/i915: Move object release to a freelist + worker") we no longer need to split the object/vma lookup into 3 phases and so combine them into a much simpler single loop. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 140 +++++++++-------------------- 1 file changed, 42 insertions(+), 98 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9da0d209dd38..18d6581bc6c5 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -47,16 +47,16 @@ enum { #define DBG_FORCE_RELOC 0 /* choose one of the above! */ }; -#define __EXEC_OBJECT_HAS_REF BIT(31) -#define __EXEC_OBJECT_HAS_PIN BIT(30) -#define __EXEC_OBJECT_HAS_FENCE BIT(29) -#define __EXEC_OBJECT_NEEDS_MAP BIT(28) -#define __EXEC_OBJECT_NEEDS_BIAS BIT(27) -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */ +#define __EXEC_OBJECT_VALIDATED BIT(31) +#define __EXEC_OBJECT_HAS_REF BIT(30) +#define __EXEC_OBJECT_HAS_PIN BIT(29) +#define __EXEC_OBJECT_HAS_FENCE BIT(28) +#define __EXEC_OBJECT_NEEDS_MAP BIT(27) +#define __EXEC_OBJECT_NEEDS_BIAS BIT(26) +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 26) /* all of the above */ #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE) #define __EXEC_HAS_RELOC BIT(31) -#define __EXEC_VALIDATED BIT(30) #define UPDATE PIN_OFFSET_FIXED #define BATCH_OFFSET_BIAS (256*1024) @@ -429,14 +429,16 @@ eb_validate_vma(struct i915_execbuffer *eb, } static int -eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) +eb_add_vma(struct i915_execbuffer *eb, + unsigned int i, struct i915_vma *vma, + unsigned int flags) { struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; int err; GEM_BUG_ON(i915_vma_is_closed(vma)); - if (!(eb->args->flags & __EXEC_VALIDATED)) { + if (!(flags & __EXEC_OBJECT_VALIDATED)) { err = eb_validate_vma(eb, entry, vma); if (unlikely(err)) return err; @@ -471,7 +473,7 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) * to find the right target VMA when doing relocations. */ eb->vma[i] = vma; - eb->flags[i] = entry->flags; + eb->flags[i] = entry->flags | flags; vma->exec_flags = &eb->flags[i]; err = 0; @@ -680,15 +682,11 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static int eb_lookup_vmas(struct i915_execbuffer *eb) +static int eb_lookup_vmas(struct i915_execbuffer *eb, unsigned int flags) { -#define INTERMEDIATE BIT(0) - const unsigned int count = eb->buffer_count; struct i915_gem_context_vma_lut *lut = &eb->ctx->vma_lut; - struct i915_vma *vma; - struct idr *idr; + struct drm_i915_gem_object *uninitialized_var(obj); unsigned int i; - int slow_pass = -1; int err; INIT_LIST_HEAD(&eb->relocs); @@ -698,85 +696,39 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) flush_work(&lut->resize); GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS); - for (i = 0; i < count; i++) { - hlist_for_each_entry(vma, - ht_head(lut, eb->exec[i].handle), - ctx_node) { - if (vma->ctx_handle != eb->exec[i].handle) - continue; - - err = eb_add_vma(eb, i, vma); - if (unlikely(err)) - return err; - goto next_vma; - } - - if (slow_pass < 0) - slow_pass = i; - - eb->vma[i] = NULL; - -next_vma: ; - } + for (i = 0; i < eb->buffer_count; i++) { + u32 handle = eb->exec[i].handle; + struct hlist_head *hl = ht_head(lut, handle); + struct i915_vma *vma; - if (slow_pass < 0) - goto out; + hlist_for_each_entry(vma, hl, ctx_node) { + GEM_BUG_ON(vma->ctx != eb->ctx); - spin_lock(&eb->file->table_lock); - /* - * Grab a reference to the object and release the lock so we can lookup - * or create the VMA without using GFP_ATOMIC - */ - idr = &eb->file->object_idr; - for (i = slow_pass; i < count; i++) { - struct drm_i915_gem_object *obj; + if (vma->ctx_handle != handle) + continue; - if (eb->vma[i]) - continue; + goto add_vma; + } - obj = to_intel_bo(idr_find(idr, eb->exec[i].handle)); + obj = i915_gem_object_lookup(eb->file, handle); if (unlikely(!obj)) { - spin_unlock(&eb->file->table_lock); - DRM_DEBUG("Invalid object handle %d at index %d\n", - eb->exec[i].handle, i); err = -ENOENT; - goto err; + goto err_vma; } - eb->vma[i] = - (struct i915_vma *)ptr_pack_bits(obj, INTERMEDIATE, 1); - } - spin_unlock(&eb->file->table_lock); - - for (i = slow_pass; i < count; i++) { - struct drm_i915_gem_object *obj; - unsigned int is_obj; - - obj = (typeof(obj))ptr_unpack_bits(eb->vma[i], &is_obj, 1); - if (!is_obj) - continue; + flags |= __EXEC_OBJECT_HAS_REF; - /* - * NOTE: We can leak any vmas created here when something fails - * later on. But that's no issue since vma_unbind can deal with - * vmas which are not actually bound. And since only - * lookup_or_create exists as an interface to get at the vma - * from the (obj, vm) we don't run the risk of creating - * duplicated vmas for the same vm. - */ vma = i915_vma_instance(obj, eb->vm, NULL); if (unlikely(IS_ERR(vma))) { - DRM_DEBUG("Failed to lookup VMA\n"); err = PTR_ERR(vma); - goto err; + goto err_obj; } /* First come, first served */ if (!vma->ctx) { vma->ctx = eb->ctx; - vma->ctx_handle = eb->exec[i].handle; - hlist_add_head(&vma->ctx_node, - ht_head(lut, eb->exec[i].handle)); + vma->ctx_handle = handle; + hlist_add_head(&vma->ctx_node, hl); lut->ht_count++; lut->ht_size |= I915_CTX_RESIZE_IN_PROGRESS; if (i915_vma_is_ggtt(vma)) { @@ -784,18 +736,14 @@ next_vma: ; obj->vma_hashed = vma; } - i915_vma_get(vma); + /* transfer ref to ctx */ + flags &= ~__EXEC_OBJECT_HAS_REF; } - err = eb_add_vma(eb, i, vma); +add_vma: + err = eb_add_vma(eb, i, vma, flags); if (unlikely(err)) - goto err; - - /* Only after we validated the user didn't use our bits */ - if (vma->ctx != eb->ctx) { - i915_vma_get(vma); - eb->flags[i] |= __EXEC_OBJECT_HAS_REF; - } + goto err_vma; } if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS) { @@ -805,7 +753,6 @@ next_vma: ; lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; } -out: /* take note of the batch buffer before we might reorder the lists */ i = eb_batch_index(eb); eb->batch = eb->vma[i]; @@ -825,17 +772,14 @@ next_vma: ; if (eb->reloc_cache.has_fence) eb->exec[i].flags |= EXEC_OBJECT_NEEDS_FENCE; - eb->args->flags |= __EXEC_VALIDATED; return eb_reserve(eb); -err: - for (i = slow_pass; i < count; i++) { - if (ptr_unmask_bits(eb->vma[i], 1)) - eb->vma[i] = NULL; - } +err_obj: + i915_gem_object_put(obj); +err_vma: + eb->vma[i] = NULL; lut->ht_size &= ~I915_CTX_RESIZE_IN_PROGRESS; return err; -#undef INTERMEDIATE } static struct i915_vma * @@ -868,7 +812,7 @@ static void eb_release_vmas(const struct i915_execbuffer *eb) unsigned int flags = eb->flags[i]; if (!vma) - continue; + break; GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); vma->exec_flags = NULL; @@ -1714,7 +1658,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb) } /* reacquire the objects */ - err = eb_lookup_vmas(eb); + err = eb_lookup_vmas(eb, __EXEC_OBJECT_VALIDATED); if (err) goto err; @@ -1768,7 +1712,7 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb) static int eb_relocate(struct i915_execbuffer *eb) { - if (eb_lookup_vmas(eb)) + if (eb_lookup_vmas(eb, 0)) goto slow; /* The objects are in their final locations, apply the relocations. */ -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx