Remove the accumulated optimisations that we have for i915_vma_retire and reduce it to the bare essential of tracking the active object reference. This allows us to only use atomic operations, and so will be able to avoid the struct_mutex requirement. The principal loss here is the shrinker MRU bumping, so now if we have to shrink, we will do so in much more random order and more likely to try and shrink recently used objects. That is a nuisance, but shrinking active objects is a second step we try to avoid and will always be a system-wide performance issue. The other loss is here is in the automatic pruning of the reservation_object when idling. This is not as large an issue as upon reservation_object introduction as now adding new fences into the object replaces already signaled fences, keeping the array compact. But we do lose the auto-expiration of stale fences and unused arrays. That may be a noticeable problem for which we need to re-implement autopruning. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 - drivers/gpu/drm/i915/gem/i915_gem_object.h | 6 --- .../gpu/drm/i915/gem/i915_gem_object_types.h | 1 - drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 5 +- .../drm/i915/gem/selftests/i915_gem_mman.c | 9 ---- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 1 - drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 32 +++++------ drivers/gpu/drm/i915/i915_debugfs.c | 8 +-- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 42 ++++++--------- drivers/gpu/drm/i915/i915_vma.c | 54 ++++--------------- 11 files changed, 47 insertions(+), 116 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 36b76c6a0a9d..01528553d4cc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -189,7 +189,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); - GEM_BUG_ON(i915_gem_object_is_active(obj)); list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 7cb1871d7128..454bfb498001 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -158,12 +158,6 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL; } -static inline bool -i915_gem_object_is_active(const struct drm_i915_gem_object *obj) -{ - return READ_ONCE(obj->active_count); -} - static inline bool i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 5b05698619ce..c299fed2c6b1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -156,7 +156,6 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ atomic_t bind_count; - unsigned int active_count; /** Count of how many global VMA are currently pinned for use by HW */ unsigned int pin_global; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index e15f37bef36a..7445e6f662ac 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -235,8 +235,9 @@ i915_gem_shrink(struct drm_i915_private *i915, continue; if (!(shrink & I915_SHRINK_ACTIVE) && - (i915_gem_object_is_active(obj) || - i915_gem_object_is_framebuffer(obj))) + (i915_gem_object_is_framebuffer(obj) || + reservation_object_test_signaled_rcu(obj->resv, + true))) continue; if (!(shrink & I915_SHRINK_BOUND) && diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b92809418729..aac093b1b0b2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -474,15 +474,6 @@ static int igt_mmap_offset_exhaustion(void *arg) pr_err("[loop %d] Failed to busy the object\n", loop); goto err_obj; } - - /* NB we rely on the _active_ reference to access obj now */ - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - err = create_mmap_offset(obj); - if (err) { - pr_err("[loop %d] create_mmap_offset failed with err=%d\n", - loop, err); - goto out; - } } out: diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bbbdc63906c6..cd4cf4d0b30c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1509,9 +1509,7 @@ static void execlists_submit_request(struct i915_request *request) static void __execlists_context_fini(struct intel_context *ce) { intel_ring_put(ce->ring); - - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); - i915_gem_object_put(ce->state->obj); + i915_vma_put(ce->state); } static void execlists_context_destroy(struct kref *kref) diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 019bf039f616..5ccf61806d9e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1309,7 +1309,6 @@ void intel_ring_free(struct kref *ref) static void __ring_context_fini(struct intel_context *ce) { - GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj)); i915_gem_object_put(ce->state->obj); } diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 45379a63e013..8f2f07cae7bd 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -129,33 +129,29 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct drm_i915_private *i915 = h->i915; struct i915_address_space *vm = h->ctx->vm ?: &i915->ggtt.vm; + struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; unsigned int flags; + void *vaddr; u32 *batch; int err; - if (i915_gem_object_is_active(h->obj)) { - struct drm_i915_gem_object *obj; - void *vaddr; - - obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); + obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); - vaddr = i915_gem_object_pin_map(obj, - i915_coherent_map_type(h->i915)); - if (IS_ERR(vaddr)) { - i915_gem_object_put(obj); - return ERR_CAST(vaddr); - } + vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(h->i915)); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } - i915_gem_object_unpin_map(h->obj); - i915_gem_object_put(h->obj); + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); - h->obj = obj; - h->batch = vaddr; - } + h->obj = obj; + h->batch = vaddr; vma = i915_vma_instance(h->obj, vm, NULL); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 323863504111..d8fe75f5e0d6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -74,11 +74,6 @@ static int i915_capabilities(struct seq_file *m, void *data) return 0; } -static char get_active_flag(struct drm_i915_gem_object *obj) -{ - return i915_gem_object_is_active(obj) ? '*' : ' '; -} - static char get_pin_flag(struct drm_i915_gem_object *obj) { return obj->pin_global ? 'p' : ' '; @@ -143,9 +138,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) unsigned int frontbuffer_bits; int pin_count = 0; - seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s", + seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s", &obj->base, - get_active_flag(obj), get_pin_flag(obj), get_tiling_flag(obj), get_global_flag(obj), diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 56adfdcaed3e..1b7595e2ac21 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -94,34 +94,26 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, list = &pool->cache_list[n]; list_for_each_entry(obj, list, batch_pool_link) { + struct reservation_object *resv = obj->resv; + /* The batches are strictly LRU ordered */ - if (i915_gem_object_is_active(obj)) { - struct reservation_object *resv = obj->resv; - - if (!reservation_object_test_signaled_rcu(resv, true)) - break; - - i915_retire_requests(pool->engine->i915); - GEM_BUG_ON(i915_gem_object_is_active(obj)); - - /* - * The object is now idle, clear the array of shared - * fences before we add a new request. Although, we - * remain on the same engine, we may be on a different - * timeline and so may continually grow the array, - * trapping a reference to all the old fences, rather - * than replace the existing fence. - */ - if (rcu_access_pointer(resv->fence)) { - reservation_object_lock(resv, NULL); - reservation_object_add_excl_fence(resv, NULL); - reservation_object_unlock(resv); - } + if (!reservation_object_test_signaled_rcu(resv, true)) + break; + + /* + * The object is now idle, clear the array of shared + * fences before we add a new request. Although, we + * remain on the same engine, we may be on a different + * timeline and so may continually grow the array, + * trapping a reference to all the old fences, rather + * than replace the existing fence. + */ + if (rcu_access_pointer(resv->fence)) { + reservation_object_lock(resv, NULL); + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); } - GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->resv, - true)); - if (obj->base.size >= size) goto found; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index cb341e4acf99..f782919ff19b 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -77,43 +77,11 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) #endif -static void obj_bump_mru(struct drm_i915_gem_object *obj) -{ - struct drm_i915_private *i915 = to_i915(obj->base.dev); - unsigned long flags; - - spin_lock_irqsave(&i915->mm.obj_lock, flags); - list_move_tail(&obj->mm.link, &i915->mm.shrink_list); - spin_unlock_irqrestore(&i915->mm.obj_lock, flags); - - obj->mm.dirty = true; /* be paranoid */ -} - static void __i915_vma_retire(struct i915_active *ref) { struct i915_vma *vma = container_of(ref, typeof(*vma), active); - struct drm_i915_gem_object *obj = vma->obj; - - GEM_BUG_ON(!i915_gem_object_is_active(obj)); - if (--obj->active_count) - return; - - /* Prune the shared fence arrays iff completely idle (inc. external) */ - if (reservation_object_trylock(obj->resv)) { - if (reservation_object_test_signaled_rcu(obj->resv, true)) - reservation_object_add_excl_fence(obj->resv, NULL); - reservation_object_unlock(obj->resv); - } - /* - * Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - if (i915_gem_object_is_shrinkable(obj)) - obj_bump_mru(obj); - - i915_gem_object_put(obj); /* and drop the active reference */ + i915_vma_put(vma); } static struct i915_vma * @@ -921,6 +889,7 @@ int i915_vma_move_to_active(struct i915_vma *vma, unsigned int flags) { struct drm_i915_gem_object *obj = vma->obj; + int err; assert_vma_held(vma); assert_object_held(obj); @@ -934,17 +903,13 @@ int i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!vma->active.count && !obj->active_count++) - i915_gem_object_get(obj); /* once more for the active ref */ - - if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) { - if (!vma->active.count && !--obj->active_count) - i915_gem_object_put(obj); - return -ENOMEM; - } + if (i915_active_acquire(&vma->active)) + i915_vma_get(vma); - GEM_BUG_ON(!i915_vma_is_active(vma)); - GEM_BUG_ON(!obj->active_count); + err = i915_active_ref(&vma->active, rq->fence.context, rq); + i915_active_release(&vma->active); + if (unlikely(err)) + return err; obj->write_domain = 0; if (flags & EXEC_OBJECT_WRITE) { @@ -956,11 +921,14 @@ int i915_vma_move_to_active(struct i915_vma *vma, obj->read_domains = 0; } obj->read_domains |= I915_GEM_GPU_DOMAINS; + obj->mm.dirty = true; if (flags & EXEC_OBJECT_NEEDS_FENCE) __i915_active_request_set(&vma->last_fence, rq); export_fence(vma, rq, flags); + + GEM_BUG_ON(!i915_vma_is_active(vma)); return 0; } -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx