Pull the individual acquisition of the context objects (state, ring, timeline) under a common i915_acquire_ctx in preparation to allow the context to evict memory (or rather the i915_acquire_ctx on its behalf). The context objects maintain their semi-permanent status; that is they are assumed to be accessible by the HW at all times until we receive a signal from the HW that they are no longer in use. Currently, we generate such a signal ourselves from the context switch following the final use of the objects. This means that they will remain on the HW for an indefinite amount of time, and we retain the use of pinning to keep them in the same place. As they are pinned, they can be processed outside of the working set for the requests within the context. This is useful, as the context share some global state causing it to incur a global lock via its objects. By only requiring that lock as the context is activated, it is both reduced in frequency and reduced in duration (as compared to execbuf). Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Reviewed-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_context.c | 113 ++++++++++++++--- drivers/gpu/drm/i915/gt/intel_ring.c | 17 ++- drivers/gpu/drm/i915/gt/intel_ring.h | 5 +- .../gpu/drm/i915/gt/intel_ring_submission.c | 119 +++++++++++------- drivers/gpu/drm/i915/gt/intel_timeline.c | 14 ++- drivers/gpu/drm/i915/gt/intel_timeline.h | 8 +- drivers/gpu/drm/i915/gt/mock_engine.c | 2 + drivers/gpu/drm/i915/gt/selftest_timeline.c | 30 ++++- 8 files changed, 240 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index cde356c7754d..ff3f7580d1ca 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -6,6 +6,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" +#include "mm/i915_acquire_ctx.h" #include "i915_drv.h" #include "i915_globals.h" @@ -30,12 +31,12 @@ void intel_context_free(struct intel_context *ce) kmem_cache_free(global.slab_ce, ce); } -static int __context_pin_state(struct i915_vma *vma) +static int __context_active_locked(struct i915_vma *vma) { unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS; int err; - err = i915_ggtt_pin(vma, 0, bias | PIN_HIGH); + err = i915_ggtt_pin_locked(vma, 0, bias | PIN_HIGH); if (err) return err; @@ -57,18 +58,18 @@ static int __context_pin_state(struct i915_vma *vma) return err; } -static void __context_unpin_state(struct i915_vma *vma) +static void __context_retire_state(struct i915_vma *vma) { i915_vma_make_shrinkable(vma); i915_active_release(&vma->active); __i915_vma_unpin(vma); } -static int __ring_active(struct intel_ring *ring) +static int __ring_active_locked(struct intel_ring *ring) { int err; - err = intel_ring_pin(ring); + err = intel_ring_pin_locked(ring); if (err) return err; @@ -100,7 +101,7 @@ static void __intel_context_retire(struct i915_active *active) set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) - __context_unpin_state(ce->state); + __context_retire_state(ce->state); intel_timeline_unpin(ce->timeline); __ring_retire(ce->ring); @@ -108,27 +109,53 @@ static void __intel_context_retire(struct i915_active *active) intel_context_put(ce); } -static int __intel_context_active(struct i915_active *active) +static int +__intel_context_acquire_lock(struct intel_context *ce, + struct i915_acquire_ctx *ctx) +{ + return i915_acquire_ctx_lock(ctx, ce->state->obj); +} + +static int +intel_context_acquire_lock(struct intel_context *ce, + struct i915_acquire_ctx *ctx) { - struct intel_context *ce = container_of(active, typeof(*ce), active); int err; - CE_TRACE(ce, "active\n"); + err = intel_ring_acquire_lock(ce->ring, ctx); + if (err) + return err; - intel_context_get(ce); + if (ce->state) { + err = __intel_context_acquire_lock(ce, ctx); + if (err) + return err; + } - err = __ring_active(ce->ring); + /* Note that the timeline will migrate as the seqno wrap around */ + err = intel_timeline_acquire_lock(ce->timeline, ctx); if (err) - goto err_put; + return err; + + return 0; +} - err = intel_timeline_pin(ce->timeline); +static int intel_context_active_locked(struct intel_context *ce) +{ + int err; + + err = __ring_active_locked(ce->ring); + if (err) + return err; + + err = intel_timeline_pin_locked(ce->timeline); if (err) goto err_ring; if (!ce->state) return 0; - err = __context_pin_state(ce->state); + err = __context_active_locked(ce->state); if (err) goto err_timeline; @@ -138,7 +165,37 @@ static int __intel_context_active(struct i915_active *active) intel_timeline_unpin(ce->timeline); err_ring: __ring_retire(ce->ring); -err_put: + return err; +} + +static int __intel_context_active(struct i915_active *active) +{ + struct intel_context *ce = container_of(active, typeof(*ce), active); + struct i915_acquire_ctx acquire; + int err; + + CE_TRACE(ce, "active\n"); + + intel_context_get(ce); + i915_acquire_ctx_init(&acquire); + + err = intel_context_acquire_lock(ce, &acquire); + if (err) + goto err; + + err = i915_acquire_mm(&acquire); + if (err) + goto err; + + err = intel_context_active_locked(ce); + if (err) + goto err; + + i915_acquire_ctx_fini(&acquire); + return 0; + +err: + i915_acquire_ctx_fini(&acquire); intel_context_put(ce); return err; } @@ -265,6 +322,27 @@ static void intel_context_active_release(struct intel_context *ce) i915_active_release(&ce->active); } +static int __intel_context_sync(struct intel_context *ce) +{ + int err; + + err = i915_vma_wait_for_bind(ce->ring->vma); + if (err) + return err; + + err = i915_vma_wait_for_bind(ce->timeline->hwsp_ggtt); + if (err) + return err; + + if (ce->state) { + err = i915_vma_wait_for_bind(ce->state); + if (err) + return err; + } + + return 0; +} + int __intel_context_do_pin(struct intel_context *ce) { int err; @@ -290,6 +368,10 @@ int __intel_context_do_pin(struct intel_context *ce) } if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) { + err = __intel_context_sync(ce); + if (unlikely(err)) + goto out_unlock; + err = intel_context_active_acquire(ce); if (unlikely(err)) goto out_unlock; @@ -337,6 +419,7 @@ void intel_context_unpin(struct intel_context *ce) intel_context_active_release(ce); rcu_read_unlock(); } + void intel_context_fini(struct intel_context *ce) { if (ce->timeline) diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index bdb324167ef3..1c21f5725731 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -5,6 +5,8 @@ */ #include "gem/i915_gem_object.h" +#include "mm/i915_acquire_ctx.h" + #include "i915_drv.h" #include "i915_vma.h" #include "intel_engine.h" @@ -21,9 +23,16 @@ unsigned int intel_ring_update_space(struct intel_ring *ring) return space; } -int intel_ring_pin(struct intel_ring *ring) +int intel_ring_acquire_lock(struct intel_ring *ring, + struct i915_acquire_ctx *ctx) +{ + return i915_acquire_ctx_lock(ctx, ring->vma->obj); +} + +int intel_ring_pin_locked(struct intel_ring *ring) { struct i915_vma *vma = ring->vma; + enum i915_map_type type; unsigned int flags; void *addr; int ret; @@ -39,15 +48,15 @@ int intel_ring_pin(struct intel_ring *ring) else flags |= PIN_HIGH; - ret = i915_ggtt_pin(vma, 0, flags); + ret = i915_ggtt_pin_locked(vma, 0, flags); if (unlikely(ret)) goto err_unpin; + type = i915_coherent_map_type(vma->vm->i915); if (i915_vma_is_map_and_fenceable(vma)) addr = (void __force *)i915_vma_pin_iomap(vma); else - addr = i915_gem_object_pin_map(vma->obj, - i915_coherent_map_type(vma->vm->i915)); + addr = __i915_gem_object_pin_map_locked(vma->obj, type); if (IS_ERR(addr)) { ret = PTR_ERR(addr); goto err_ring; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.h b/drivers/gpu/drm/i915/gt/intel_ring.h index cc0ebca65167..34134a0b80b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.h +++ b/drivers/gpu/drm/i915/gt/intel_ring.h @@ -11,6 +11,7 @@ #include "i915_request.h" #include "intel_ring_types.h" +struct i915_acquire_ctx; struct intel_engine_cs; struct intel_ring * @@ -21,7 +22,9 @@ int intel_ring_cacheline_align(struct i915_request *rq); unsigned int intel_ring_update_space(struct intel_ring *ring); -int intel_ring_pin(struct intel_ring *ring); +int intel_ring_acquire_lock(struct intel_ring *ring, + struct i915_acquire_ctx *ctx); +int intel_ring_pin_locked(struct intel_ring *ring); void intel_ring_unpin(struct intel_ring *ring); void intel_ring_reset(struct intel_ring *ring, u32 tail); diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a3b10f3c83eb..f545fe5ade60 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -27,6 +27,8 @@ * */ +#include "mm/i915_acquire_ctx.h" + #include "gen2_engine_cs.h" #include "gen6_engine_cs.h" #include "gen6_ppgtt.h" @@ -1010,6 +1012,15 @@ static void gen6_bsd_set_default_submission(struct intel_engine_cs *engine) engine->submit_request = gen6_bsd_submit_request; } +static void ring_release_global_timeline(struct intel_engine_cs *engine) +{ + intel_ring_unpin(engine->legacy.ring); + intel_ring_put(engine->legacy.ring); + + intel_timeline_unpin(engine->legacy.timeline); + intel_timeline_put(engine->legacy.timeline); +} + static void ring_release(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1024,11 +1035,7 @@ static void ring_release(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); } - intel_ring_unpin(engine->legacy.ring); - intel_ring_put(engine->legacy.ring); - - intel_timeline_unpin(engine->legacy.timeline); - intel_timeline_put(engine->legacy.timeline); + ring_release_global_timeline(engine); } static void setup_irq(struct intel_engine_cs *engine) @@ -1227,12 +1234,70 @@ static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine) return err; } -int intel_ring_submission_setup(struct intel_engine_cs *engine) +static int ring_setup_global_timeline(struct intel_engine_cs *engine) { + struct i915_acquire_ctx acquire; struct intel_timeline *timeline; struct intel_ring *ring; int err; + timeline = intel_timeline_create_from_engine(engine, + I915_GEM_HWS_SEQNO_ADDR); + if (IS_ERR(timeline)) + return PTR_ERR(timeline); + GEM_BUG_ON(timeline->has_initial_breadcrumb); + + ring = intel_engine_create_ring(engine, SZ_16K); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto err_timeline; + } + + i915_acquire_ctx_init(&acquire); + + err = intel_ring_acquire_lock(ring, &acquire); + if (err) + goto err_acquire; + + err = intel_timeline_acquire_lock(timeline, &acquire); + if (err) + goto err_acquire; + + err = i915_acquire_mm(&acquire); + if (err) + goto err_acquire; + + err = intel_timeline_pin_locked(timeline); + if (err) + goto err_acquire; + + err = intel_ring_pin_locked(ring); + if (err) + goto err_timeline_unpin; + + i915_acquire_ctx_fini(&acquire); + + GEM_BUG_ON(engine->legacy.ring); + engine->legacy.ring = ring; + engine->legacy.timeline = timeline; + + GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + return 0; + +err_timeline_unpin: + intel_timeline_unpin(timeline); +err_acquire: + i915_acquire_ctx_fini(&acquire); + intel_ring_put(ring); +err_timeline: + intel_timeline_put(timeline); + return err; +} + +int intel_ring_submission_setup(struct intel_engine_cs *engine) +{ + int err; + setup_common(engine); switch (engine->class) { @@ -1253,38 +1318,14 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) return -ENODEV; } - timeline = intel_timeline_create_from_engine(engine, - I915_GEM_HWS_SEQNO_ADDR); - if (IS_ERR(timeline)) { - err = PTR_ERR(timeline); - goto err; - } - GEM_BUG_ON(timeline->has_initial_breadcrumb); - - err = intel_timeline_pin(timeline); + err = ring_setup_global_timeline(engine); if (err) - goto err_timeline; - - ring = intel_engine_create_ring(engine, SZ_16K); - if (IS_ERR(ring)) { - err = PTR_ERR(ring); - goto err_timeline_unpin; - } - - err = intel_ring_pin(ring); - if (err) - goto err_ring; - - GEM_BUG_ON(engine->legacy.ring); - engine->legacy.ring = ring; - engine->legacy.timeline = timeline; - - GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); + goto err_common; if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { err = gen7_ctx_switch_bb_init(engine); if (err) - goto err_ring_unpin; + goto err_global; } /* Finally, take ownership and responsibility for cleanup! */ @@ -1292,15 +1333,9 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) return 0; -err_ring_unpin: - intel_ring_unpin(ring); -err_ring: - intel_ring_put(ring); -err_timeline_unpin: - intel_timeline_unpin(timeline); -err_timeline: - intel_timeline_put(timeline); -err: +err_global: + ring_release_global_timeline(engine); +err_common: intel_engine_cleanup_common(engine); return err; } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 2baecf414acb..8b8aef24848c 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -4,9 +4,10 @@ * Copyright © 2016-2018 Intel Corporation */ -#include "i915_drv.h" +#include "mm/i915_acquire_ctx.h" #include "i915_active.h" +#include "i915_drv.h" #include "i915_syncmap.h" #include "intel_gt.h" #include "intel_ring.h" @@ -317,14 +318,21 @@ __intel_timeline_create(struct intel_gt *gt, return timeline; } -int intel_timeline_pin(struct intel_timeline *tl) +int +intel_timeline_acquire_lock(struct intel_timeline *tl, + struct i915_acquire_ctx *ctx) +{ + return i915_acquire_ctx_lock(ctx, tl->hwsp_ggtt->obj); +} + +int intel_timeline_pin_locked(struct intel_timeline *tl) { int err; if (atomic_add_unless(&tl->pin_count, 1, 0)) return 0; - err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH); + err = i915_ggtt_pin_locked(tl->hwsp_ggtt, 0, PIN_HIGH); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 06bd06c6595f..e0afa12bac9e 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -31,6 +31,8 @@ #include "i915_syncmap.h" #include "intel_timeline_types.h" +struct i915_acquire_ctx; + struct intel_timeline * __intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp, @@ -88,7 +90,11 @@ static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); } -int intel_timeline_pin(struct intel_timeline *tl); +int +intel_timeline_acquire_lock(struct intel_timeline *tl, + struct i915_acquire_ctx *ctx); +int intel_timeline_pin_locked(struct intel_timeline *tl); + void intel_timeline_enter(struct intel_timeline *tl); int intel_timeline_get_seqno(struct intel_timeline *tl, struct i915_request *rq, diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 79764305b8ec..b1db6be0c1c0 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -67,6 +67,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) __set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma)); __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags); ring->vma->node.size = sz; + ring->vma->obj = i915_gem_object_create_internal(engine->i915, 4096); intel_ring_update_space(ring); @@ -75,6 +76,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) static void mock_ring_free(struct intel_ring *ring) { + i915_gem_object_put(ring->vma->obj); i915_active_fini(&ring->vma->active); i915_vma_free(ring->vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 98ceac2fb77d..9db0fc82bbd2 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -6,6 +6,8 @@ #include <linux/prime_numbers.h> +#include "mm/i915_acquire_ctx.h" + #include "intel_context.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" @@ -449,13 +451,37 @@ static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) return 0; } +static int tl_pin(struct intel_timeline *tl) +{ + struct i915_acquire_ctx acquire; + int err; + + i915_acquire_ctx_init(&acquire); + + err = intel_timeline_acquire_lock(tl, &acquire); + if (err) + goto out; + + err = i915_acquire_mm(&acquire); + if (err) + goto out; + + err = intel_timeline_pin_locked(tl); + if (err == 0) + err = i915_vma_wait_for_bind(tl->hwsp_ggtt); + +out: + i915_acquire_ctx_fini(&acquire); + return err; +} + static struct i915_request * tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) { struct i915_request *rq; int err; - err = intel_timeline_pin(tl); + err = tl_pin(tl); if (err) { rq = ERR_PTR(err); goto out; @@ -667,7 +693,7 @@ static int live_hwsp_wrap(void *arg) if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) goto out_free; - err = intel_timeline_pin(tl); + err = tl_pin(tl); if (err) goto out_free; -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx