Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Quoting Mika Kuoppala (2019-06-12 14:29:48) >> Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: >> >> > We need to keep the context image pinned in memory until after the GPU >> > has finished writing into it. Since it continues to write as we signal >> > the final breadcrumb, we need to keep it pinned until the request after >> > it is complete. Currently we know the order in which requests execute on >> > each engine, and so to remove that presumption we need to identify a >> > request/context-switch we know must occur after our completion. Any >> > request queued after the signal must imply a context switch, for >> > simplicity we use a fresh request from the kernel context. >> > >> > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> > --- >> > drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 ++---- >> > drivers/gpu/drm/i915/gem/i915_gem_context.h | 1 - >> > drivers/gpu/drm/i915/gem/i915_gem_pm.c | 20 ++++- >> > drivers/gpu/drm/i915/gt/intel_context.c | 80 ++++++++++++++++--- >> > drivers/gpu/drm/i915/gt/intel_context.h | 3 + >> > drivers/gpu/drm/i915/gt/intel_context_types.h | 6 +- >> > drivers/gpu/drm/i915/gt/intel_engine.h | 2 - >> > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 23 +----- >> > drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 + >> > drivers/gpu/drm/i915/gt/intel_engine_types.h | 13 +-- >> > drivers/gpu/drm/i915/gt/intel_lrc.c | 62 ++------------ >> > drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 44 +--------- >> > drivers/gpu/drm/i915/gt/mock_engine.c | 11 +-- >> > drivers/gpu/drm/i915/i915_active.c | 80 ++++++++++++++++++- >> > drivers/gpu/drm/i915/i915_active.h | 5 ++ >> > drivers/gpu/drm/i915/i915_active_types.h | 3 + >> > drivers/gpu/drm/i915/i915_gem.c | 4 - >> > drivers/gpu/drm/i915/i915_request.c | 15 ---- >> > .../gpu/drm/i915/selftests/mock_gem_device.c | 1 - >> > 19 files changed, 214 insertions(+), 185 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c >> > index c86ca9f21532..6200060aef05 100644 >> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c >> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c >> > @@ -692,17 +692,6 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv) >> > return 0; >> > } >> > >> > -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) >> > -{ >> > - struct intel_engine_cs *engine; >> > - enum intel_engine_id id; >> > - >> > - lockdep_assert_held(&dev_priv->drm.struct_mutex); >> > - >> > - for_each_engine(engine, dev_priv, id) >> > - intel_engine_lost_context(engine); >> > -} >> > - >> > void i915_gem_contexts_fini(struct drm_i915_private *i915) >> > { >> > lockdep_assert_held(&i915->drm.struct_mutex); >> > @@ -1203,10 +1192,6 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) >> > if (ret) >> > goto out_add; >> > >> > - ret = gen8_emit_rpcs_config(rq, ce, sseu); >> > - if (ret) >> > - goto out_add; >> > - >> > /* >> > * Guarantee context image and the timeline remains pinned until the >> > * modifying request is retired by setting the ce activity tracker. >> > @@ -1214,9 +1199,12 @@ gen8_modify_rpcs(struct intel_context *ce, struct intel_sseu sseu) >> > * But we only need to take one pin on the account of it. Or in other >> > * words transfer the pinned ce object to tracked active request. >> > */ >> > - if (!i915_active_request_isset(&ce->active_tracker)) >> > - __intel_context_pin(ce); >> > - __i915_active_request_set(&ce->active_tracker, rq); >> > + GEM_BUG_ON(i915_active_is_idle(&ce->active)); >> > + ret = i915_active_ref(&ce->active, rq->fence.context, rq); >> >> >> Why the place to keep the context alive is this function? > > This is a special case where we have one context (the kernel context) > writing into the context state object of another. To keep the target > context state pinned, we mark the entire context as active. > >> In other words, if the sseu state is not changed, we bail out early >> and don't setup the tracker and thus fail in promise for keeping it alive. > > As we don't need to keep it alive for an access that never happened. > >> > + if (ret) >> > + goto out_add; >> > + >> > + ret = gen8_emit_rpcs_config(rq, ce, sseu); >> > >> > out_add: >> > i915_request_add(rq); >> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h >> > index 630392c77e48..9691dd062f72 100644 >> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h >> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h >> > @@ -134,7 +134,6 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) >> > >> > /* i915_gem_context.c */ >> > int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); >> > -void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); >> > void i915_gem_contexts_fini(struct drm_i915_private *dev_priv); >> > >> > int i915_gem_context_open(struct drm_i915_private *i915, >> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c >> > index f40f13c0b8b7..59b6d45b1936 100644 >> > --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c >> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c >> > @@ -10,6 +10,22 @@ >> > #include "i915_drv.h" >> > #include "i915_globals.h" >> > >> > +static void call_idle_barriers(struct intel_engine_cs *engine) >> > +{ >> > + struct llist_node *node, *next; >> > + >> > + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) { >> > + struct i915_active_request *active = >> > + container_of((struct list_head *)node, >> > + typeof(*active), link); >> > + >> > + INIT_LIST_HEAD(&active->link); >> > + RCU_INIT_POINTER(active->request, NULL); >> > + >> > + active->retire(active, NULL); >> > + } >> > +} >> > + >> > static void i915_gem_park(struct drm_i915_private *i915) >> > { >> > struct intel_engine_cs *engine; >> > @@ -17,8 +33,10 @@ static void i915_gem_park(struct drm_i915_private *i915) >> > >> > lockdep_assert_held(&i915->drm.struct_mutex); >> > >> > - for_each_engine(engine, i915, id) >> > + for_each_engine(engine, i915, id) { >> > + call_idle_barriers(engine); /* cleanup after wedging */ >> > i915_gem_batch_pool_fini(&engine->batch_pool); >> > + } >> > >> > i915_timelines_park(i915); >> > i915_vma_parked(i915); >> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c >> > index c78ec0b58e77..c10eb4904264 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_context.c >> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c >> > @@ -61,7 +61,6 @@ int __intel_context_do_pin(struct intel_context *ce) >> > >> > i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ >> > >> > - intel_context_get(ce); >> > smp_mb__before_atomic(); /* flush pin before it is visible */ >> > } >> > >> > @@ -89,20 +88,45 @@ void intel_context_unpin(struct intel_context *ce) >> > ce->ops->unpin(ce); >> > >> > i915_gem_context_put(ce->gem_context); >> > - intel_context_put(ce); >> > + intel_context_inactive(ce); >> > } >> > >> > mutex_unlock(&ce->pin_mutex); >> > intel_context_put(ce); >> > } >> > >> > -static void intel_context_retire(struct i915_active_request *active, >> > - struct i915_request *rq) >> > +static int __context_pin_state(struct i915_vma *vma, unsigned long flags) >> > { >> > - struct intel_context *ce = >> > - container_of(active, typeof(*ce), active_tracker); >> > + int err; >> > >> > - intel_context_unpin(ce); >> > + err = i915_vma_pin(vma, 0, 0, flags | PIN_GLOBAL); >> > + if (err) >> > + return err; >> > + >> > + /* >> > + * And mark it as a globally pinned object to let the shrinker know >> > + * it cannot reclaim the object until we release it. >> > + */ >> > + vma->obj->pin_global++; >> > + vma->obj->mm.dirty = true; >> > + >> > + return 0; >> > +} >> > + >> > +static void __context_unpin_state(struct i915_vma *vma) >> > +{ >> > + vma->obj->pin_global--; >> > + __i915_vma_unpin(vma); >> > +} >> > + >> > +static void intel_context_retire(struct i915_active *active) >> > +{ >> > + struct intel_context *ce = container_of(active, typeof(*ce), active); >> > + >> > + if (ce->state) >> > + __context_unpin_state(ce->state); >> > + >> > + intel_context_put(ce); >> > } >> > >> > void >> > @@ -125,8 +149,46 @@ intel_context_init(struct intel_context *ce, >> > >> > mutex_init(&ce->pin_mutex); >> > >> > - i915_active_request_init(&ce->active_tracker, >> > - NULL, intel_context_retire); >> > + i915_active_init(ctx->i915, &ce->active, intel_context_retire); >> > +} >> > + >> > +int intel_context_active(struct intel_context *ce, unsigned long flags) >> >> >> I can digest this but was missing the verb in this and thought >> intel_context_activate|deactivate. > > You will never make me write activ8! Other than inserting mark or make, > I don't have a better idea and have grown quite used to over the last > several months. I think the intent here is reasonably clear, this is to > operate on the ce->active. > mark_active was also in my mind. > Maybe, intel_context_active_acquire() and intel_context_active_release()? On your note that it is the ce->active we operate on, not the context, the current naming starts to fit. > >> > +{ >> > + int err; >> > + >> > + if (!i915_active_acquire(&ce->active)) >> > + return 0; >> > + >> > + intel_context_get(ce); >> > + >> > + if (!ce->state) >> > + return 0; >> > + >> > + err = __context_pin_state(ce->state, flags); >> > + if (err) { >> > + i915_active_cancel(&ce->active); >> > + intel_context_put(ce); >> > + return err; >> > + } >> > + >> > + /* Preallocate tracking nodes */ >> > + if (!i915_gem_context_is_kernel(ce->gem_context)) { >> > + err = i915_active_acquire_preallocate_barrier(&ce->active, >> > + ce->engine); >> > + if (err) { >> > + i915_active_release(&ce->active); >> > + return err; >> > + } >> > + } >> > + >> > + return 0; >> > +} >> > + >> > +void intel_context_inactive(struct intel_context *ce) >> > +{ >> > + /* Nodes preallocated in intel_context_active() */ >> > + i915_active_acquire_barrier(&ce->active); >> > + i915_active_release(&ce->active); >> > } >> > >> > static void i915_global_context_shrink(void) >> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h >> > index 6d5453ba2c1e..4de4ba2df7d4 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_context.h >> > +++ b/drivers/gpu/drm/i915/gt/intel_context.h >> > @@ -102,6 +102,9 @@ static inline void intel_context_exit(struct intel_context *ce) >> > ce->ops->exit(ce); >> > } >> > >> > +int intel_context_active(struct intel_context *ce, unsigned long flags); >> > +void intel_context_inactive(struct intel_context *ce); >> > + >> > static inline struct intel_context *intel_context_get(struct intel_context *ce) >> > { >> > kref_get(&ce->ref); >> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h >> > index 825fcf0ac9c4..e95be4be9612 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h >> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h >> > @@ -56,10 +56,10 @@ struct intel_context { >> > intel_engine_mask_t saturated; /* submitting semaphores too late? */ >> > >> > /** >> > - * active_tracker: Active tracker for the external rq activity >> > - * on this intel_context object. >> > + * active: Active tracker for the rq activity (inc. external) on this >> > + * intel_context object. >> > */ >> > - struct i915_active_request active_tracker; >> > + struct i915_active active; >> > >> > const struct intel_context_ops *ops; >> > >> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h >> > index 201bbd2a4faf..b9fd88f21609 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_engine.h >> > +++ b/drivers/gpu/drm/i915/gt/intel_engine.h >> > @@ -466,8 +466,6 @@ static inline void intel_engine_reset(struct intel_engine_cs *engine, >> > bool intel_engine_is_idle(struct intel_engine_cs *engine); >> > bool intel_engines_are_idle(struct drm_i915_private *dev_priv); >> > >> > -void intel_engine_lost_context(struct intel_engine_cs *engine); >> > - >> > void intel_engines_reset_default_submission(struct drm_i915_private *i915); >> > unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915); >> > >> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >> > index c0d986db5a75..5a08036ae774 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c >> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >> > @@ -611,6 +611,8 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) >> > { >> > int err; >> > >> > + init_llist_head(&engine->barrier_tasks); >> > + >> > err = init_status_page(engine); >> > if (err) >> > return err; >> > @@ -870,6 +872,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) >> > if (engine->preempt_context) >> > intel_context_unpin(engine->preempt_context); >> > intel_context_unpin(engine->kernel_context); >> > + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); >> > >> > i915_timeline_fini(&engine->timeline); >> > >> > @@ -1201,26 +1204,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) >> > engine->set_default_submission(engine); >> > } >> > >> > -/** >> > - * intel_engine_lost_context: called when the GPU is reset into unknown state >> > - * @engine: the engine >> > - * >> > - * We have either reset the GPU or otherwise about to lose state tracking of >> > - * the current GPU logical state (e.g. suspend). On next use, it is therefore >> > - * imperative that we make no presumptions about the current state and load >> > - * from scratch. >> > - */ >> > -void intel_engine_lost_context(struct intel_engine_cs *engine) >> > -{ >> > - struct intel_context *ce; >> > - >> > - lockdep_assert_held(&engine->i915->drm.struct_mutex); >> > - >> > - ce = fetch_and_zero(&engine->last_retired_context); >> > - if (ce) >> > - intel_context_unpin(ce); >> > -} >> > - >> > bool intel_engine_can_store_dword(struct intel_engine_cs *engine) >> > { >> > switch (INTEL_GEN(engine->i915)) { >> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c >> > index ccf034764741..3c448a061abd 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c >> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c >> > @@ -88,6 +88,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) >> > >> > /* Check again on the next retirement. */ >> > engine->wakeref_serial = engine->serial + 1; >> > + >> > + i915_request_add_barriers(rq); >> > __i915_request_commit(rq); >> > >> > return false; >> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h >> > index 01223864237a..33a31aa2d2ae 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h >> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h >> > @@ -11,6 +11,7 @@ >> > #include <linux/irq_work.h> >> > #include <linux/kref.h> >> > #include <linux/list.h> >> > +#include <linux/llist.h> >> > #include <linux/types.h> >> > >> > #include "i915_gem.h" >> > @@ -288,6 +289,7 @@ struct intel_engine_cs { >> > struct intel_ring *buffer; >> > >> > struct i915_timeline timeline; >> > + struct llist_head barrier_tasks; >> > >> > struct intel_context *kernel_context; /* pinned */ >> > struct intel_context *preempt_context; /* pinned; optional */ >> > @@ -435,17 +437,6 @@ struct intel_engine_cs { >> > >> > struct intel_engine_execlists execlists; >> > >> > - /* Contexts are pinned whilst they are active on the GPU. The last >> > - * context executed remains active whilst the GPU is idle - the >> > - * switch away and write to the context object only occurs on the >> > - * next execution. Contexts are only unpinned on retirement of the >> > - * following request ensuring that we can always write to the object >> > - * on the context switch even after idling. Across suspend, we switch >> > - * to the kernel context and trash it as the save may not happen >> > - * before the hardware is powered down. >> > - */ >> > - struct intel_context *last_retired_context; >> > - >> > /* status_notifier: list of callbacks for context-switch changes */ >> > struct atomic_notifier_head context_status_notifier; >> > >> > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >> > index b8f5592da18f..05524489615c 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >> > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >> > @@ -1422,60 +1422,11 @@ static void execlists_context_destroy(struct kref *kref) >> > intel_context_free(ce); >> > } >> > >> > -static int __context_pin(struct i915_vma *vma) >> > -{ >> > - unsigned int flags; >> > - int err; >> > - >> > - flags = PIN_GLOBAL | PIN_HIGH; >> > - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); >> > - >> > - err = i915_vma_pin(vma, 0, 0, flags); >> > - if (err) >> > - return err; >> > - >> > - vma->obj->pin_global++; >> > - vma->obj->mm.dirty = true; >> > - >> > - return 0; >> > -} >> > - >> > -static void __context_unpin(struct i915_vma *vma) >> > -{ >> > - vma->obj->pin_global--; >> > - __i915_vma_unpin(vma); >> > -} >> > - >> > static void execlists_context_unpin(struct intel_context *ce) >> > { >> > - struct intel_engine_cs *engine; >> > - >> > - /* >> > - * The tasklet may still be using a pointer to our state, via an >> > - * old request. However, since we know we only unpin the context >> > - * on retirement of the following request, we know that the last >> > - * request referencing us will have had a completion CS interrupt. >> > - * If we see that it is still active, it means that the tasklet hasn't >> > - * had the chance to run yet; let it run before we teardown the >> > - * reference it may use. >> > - */ >> > - engine = READ_ONCE(ce->inflight); >> > - if (unlikely(engine)) { >> > - unsigned long flags; >> > - >> > - spin_lock_irqsave(&engine->timeline.lock, flags); >> > - process_csb(engine); >> > - spin_unlock_irqrestore(&engine->timeline.lock, flags); >> > - >> > - GEM_BUG_ON(READ_ONCE(ce->inflight)); >> > - } >> > - >> > i915_gem_context_unpin_hw_id(ce->gem_context); >> > - >> > - intel_ring_unpin(ce->ring); >> > - >> > i915_gem_object_unpin_map(ce->state->obj); >> > - __context_unpin(ce->state); >> > + intel_ring_unpin(ce->ring); >> > } >> > >> > static void >> > @@ -1512,7 +1463,10 @@ __execlists_context_pin(struct intel_context *ce, >> > goto err; >> > GEM_BUG_ON(!ce->state); >> > >> > - ret = __context_pin(ce->state); >> > + ret = intel_context_active(ce, >> > + engine->i915->ggtt.pin_bias | >> > + PIN_OFFSET_BIAS | >> > + PIN_HIGH); >> > if (ret) >> > goto err; >> > >> > @@ -1521,7 +1475,7 @@ __execlists_context_pin(struct intel_context *ce, >> > I915_MAP_OVERRIDE); >> > if (IS_ERR(vaddr)) { >> > ret = PTR_ERR(vaddr); >> > - goto unpin_vma; >> > + goto unpin_active; >> > } >> > >> > ret = intel_ring_pin(ce->ring); >> > @@ -1542,8 +1496,8 @@ __execlists_context_pin(struct intel_context *ce, >> > intel_ring_unpin(ce->ring); >> > unpin_map: >> > i915_gem_object_unpin_map(ce->state->obj); >> > -unpin_vma: >> > - __context_unpin(ce->state); >> > +unpin_active: >> > + intel_context_inactive(ce); >> > err: >> > return ret; >> > } >> > diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c >> > index c834d016c965..7ab28b6f62a1 100644 >> > --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c >> > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c >> > @@ -1349,45 +1349,9 @@ static void __context_unpin_ppgtt(struct i915_gem_context *ctx) >> > gen6_ppgtt_unpin(i915_vm_to_ppgtt(vm)); >> > } >> > >> > -static int __context_pin(struct intel_context *ce) >> > -{ >> > - struct i915_vma *vma; >> > - int err; >> > - >> > - vma = ce->state; >> > - if (!vma) >> > - return 0; >> > - >> > - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); >> > - if (err) >> > - return err; >> > - >> > - /* >> > - * And mark is as a globally pinned object to let the shrinker know >> > - * it cannot reclaim the object until we release it. >> > - */ >> > - vma->obj->pin_global++; >> > - vma->obj->mm.dirty = true; >> > - >> > - return 0; >> > -} >> > - >> > -static void __context_unpin(struct intel_context *ce) >> > -{ >> > - struct i915_vma *vma; >> > - >> > - vma = ce->state; >> > - if (!vma) >> > - return; >> > - >> > - vma->obj->pin_global--; >> > - i915_vma_unpin(vma); >> > -} >> > - >> > static void ring_context_unpin(struct intel_context *ce) >> > { >> > __context_unpin_ppgtt(ce->gem_context); >> > - __context_unpin(ce); >> > } >> > >> > static struct i915_vma * >> > @@ -1477,18 +1441,18 @@ static int ring_context_pin(struct intel_context *ce) >> > ce->state = vma; >> > } >> > >> > - err = __context_pin(ce); >> > + err = intel_context_active(ce, PIN_HIGH); >> > if (err) >> > return err; >> > >> > err = __context_pin_ppgtt(ce->gem_context); >> > if (err) >> > - goto err_unpin; >> > + goto err_active; >> > >> > return 0; >> > >> > -err_unpin: >> > - __context_unpin(ce); >> > +err_active: >> > + intel_context_inactive(ce); >> > return err; >> > } >> > >> > diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c >> > index 6d7562769eb2..b7675ef18523 100644 >> > --- a/drivers/gpu/drm/i915/gt/mock_engine.c >> > +++ b/drivers/gpu/drm/i915/gt/mock_engine.c >> > @@ -146,12 +146,18 @@ static void mock_context_destroy(struct kref *ref) >> > >> > static int mock_context_pin(struct intel_context *ce) >> > { >> > + int ret; >> > + >> > if (!ce->ring) { >> > ce->ring = mock_ring(ce->engine); >> > if (!ce->ring) >> > return -ENOMEM; >> > } >> > >> > + ret = intel_context_active(ce, PIN_HIGH); >> > + if (ret) >> > + return ret; >> > + >> > mock_timeline_pin(ce->ring->timeline); >> > return 0; >> > } >> > @@ -328,14 +334,9 @@ void mock_engine_free(struct intel_engine_cs *engine) >> > { >> > struct mock_engine *mock = >> > container_of(engine, typeof(*mock), base); >> > - struct intel_context *ce; >> > >> > GEM_BUG_ON(timer_pending(&mock->hw_delay)); >> > >> > - ce = fetch_and_zero(&engine->last_retired_context); >> > - if (ce) >> > - intel_context_unpin(ce); >> > - >> > intel_context_unpin(engine->kernel_context); >> > >> > intel_engine_fini_breadcrumbs(engine); >> > diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c >> > index 863ae12707ba..100e40afc9d6 100644 >> > --- a/drivers/gpu/drm/i915/i915_active.c >> > +++ b/drivers/gpu/drm/i915/i915_active.c >> > @@ -100,7 +100,7 @@ active_instance(struct i915_active *ref, u64 idx) >> > parent = *p; >> > >> > node = rb_entry(parent, struct active_node, node); >> > - if (node->timeline == idx) >> > + if (node->timeline == idx && !IS_ERR(node->base.request)) >> >> Is this related change? > > It once was (in the next chunk). I used to insert the freshly preallocated > node into the tree before it had a valid request. It appears that is no > longer the case and the ERR_PTR is kept safely in a list until ready. > >> -Mika >> >> > goto replace; >> > >> > if (node->timeline < idx) >> > @@ -157,6 +157,7 @@ void i915_active_init(struct drm_i915_private *i915, >> > ref->retire = retire; >> > ref->tree = RB_ROOT; >> > i915_active_request_init(&ref->last, NULL, last_retire); >> > + init_llist_head(&ref->barriers); >> > ref->count = 0; >> > } >> > >> > @@ -263,6 +264,83 @@ void i915_active_fini(struct i915_active *ref) >> > } >> > #endif >> > >> > +int i915_active_acquire_preallocate_barrier(struct i915_active *ref, >> > + struct intel_engine_cs *engine) >> > +{ >> > + struct drm_i915_private *i915 = engine->i915; >> > + unsigned long tmp; >> > + int err = 0; >> > + >> > + GEM_BUG_ON(!engine->mask); >> > + for_each_engine_masked(engine, i915, engine->mask, tmp) { >> > + struct intel_context *kctx = engine->kernel_context; >> > + struct active_node *node; >> > + >> > + node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); >> > + if (unlikely(!node)) { >> > + err = -ENOMEM; >> > + break; >> > + } >> > + >> > + i915_active_request_init(&node->base, >> > + (void *)engine, node_retire); In commit you promise that you will queue a request for kernel context. But in here, you seem to use (abuse!?) the active request to make a shadow of a request and use it to call the idle barriers. So either the commit message needs tweaking or I don't have a slightest idea yet where we are flying in here :) -Mika >> > + node->timeline = kctx->ring->timeline->fence_context; >> > + node->ref = ref; >> > + ref->count++; >> > + >> > + llist_add((struct llist_node *)&node->base.link, >> > + &ref->barriers); >> > + } >> > + >> > + return err; >> > +} >> > + >> > +void i915_active_acquire_barrier(struct i915_active *ref) >> > +{ >> > + struct llist_node *pos, *next; >> > + >> > + i915_active_acquire(ref); >> > + >> > + llist_for_each_safe(pos, next, llist_del_all(&ref->barriers)) { >> > + struct intel_engine_cs *engine; >> > + struct active_node *node; >> > + struct rb_node **p, *parent; >> > + >> > + node = container_of((struct list_head *)pos, >> > + typeof(*node), base.link); >> > + >> > + engine = (void *)rcu_access_pointer(node->base.request); >> > + RCU_INIT_POINTER(node->base.request, ERR_PTR(-EAGAIN)); >> > + >> > + parent = NULL; >> > + p = &ref->tree.rb_node; >> > + while (*p) { >> > + parent = *p; >> > + if (rb_entry(parent, >> > + struct active_node, >> > + node)->timeline < node->timeline) >> > + p = &parent->rb_right; >> > + else >> > + p = &parent->rb_left; >> > + } >> > + rb_link_node(&node->node, parent, p); >> > + rb_insert_color(&node->node, &ref->tree); >> > + >> > + llist_add((struct llist_node *)&node->base.link, >> > + &engine->barrier_tasks); >> > + } >> > + i915_active_release(ref); >> > +} >> > + >> > +void i915_request_add_barriers(struct i915_request *rq) >> > +{ >> > + struct intel_engine_cs *engine = rq->engine; >> > + struct llist_node *node, *next; >> > + >> > + llist_for_each_safe(node, next, llist_del_all(&engine->barrier_tasks)) >> > + list_add_tail((struct list_head *)node, &rq->active_list); >> > +} >> > + >> > int i915_active_request_set(struct i915_active_request *active, >> > struct i915_request *rq) >> > { >> > diff --git a/drivers/gpu/drm/i915/i915_active.h b/drivers/gpu/drm/i915/i915_active.h >> > index 7d758719ce39..d55d37673944 100644 >> > --- a/drivers/gpu/drm/i915/i915_active.h >> > +++ b/drivers/gpu/drm/i915/i915_active.h >> > @@ -406,4 +406,9 @@ void i915_active_fini(struct i915_active *ref); >> > static inline void i915_active_fini(struct i915_active *ref) { } >> > #endif >> > >> > +int i915_active_acquire_preallocate_barrier(struct i915_active *ref, >> > + struct intel_engine_cs *engine); >> > +void i915_active_acquire_barrier(struct i915_active *ref); >> > +void i915_request_add_barriers(struct i915_request *rq); >> > + >> > #endif /* _I915_ACTIVE_H_ */ >> > diff --git a/drivers/gpu/drm/i915/i915_active_types.h b/drivers/gpu/drm/i915/i915_active_types.h >> > index b679253b53a5..c025991b9233 100644 >> > --- a/drivers/gpu/drm/i915/i915_active_types.h >> > +++ b/drivers/gpu/drm/i915/i915_active_types.h >> > @@ -7,6 +7,7 @@ >> > #ifndef _I915_ACTIVE_TYPES_H_ >> > #define _I915_ACTIVE_TYPES_H_ >> > >> > +#include <linux/llist.h> >> > #include <linux/rbtree.h> >> > #include <linux/rcupdate.h> >> > >> > @@ -31,6 +32,8 @@ struct i915_active { >> > unsigned int count; >> > >> > void (*retire)(struct i915_active *ref); >> > + >> > + struct llist_head barriers; >> > }; >> > >> > #endif /* _I915_ACTIVE_TYPES_H_ */ >> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> > index e980c1ee3dcf..0663f2df65d6 100644 >> > --- a/drivers/gpu/drm/i915/i915_gem.c >> > +++ b/drivers/gpu/drm/i915/i915_gem.c >> > @@ -1197,10 +1197,6 @@ void i915_gem_sanitize(struct drm_i915_private *i915) >> > >> > intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); >> > intel_runtime_pm_put(i915, wakeref); >> > - >> > - mutex_lock(&i915->drm.struct_mutex); >> > - i915_gem_contexts_lost(i915); >> > - mutex_unlock(&i915->drm.struct_mutex); >> > } >> > >> > void i915_gem_init_swizzling(struct drm_i915_private *dev_priv) >> > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c >> > index e9b59eea4f10..9eff9de7fa10 100644 >> > --- a/drivers/gpu/drm/i915/i915_request.c >> > +++ b/drivers/gpu/drm/i915/i915_request.c >> > @@ -213,18 +213,6 @@ static void __retire_engine_request(struct intel_engine_cs *engine, >> > spin_unlock(&rq->lock); >> > >> > local_irq_enable(); >> > - >> > - /* >> > - * The backing object for the context is done after switching to the >> > - * *next* context. Therefore we cannot retire the previous context until >> > - * the next context has already started running. However, since we >> > - * cannot take the required locks at i915_request_submit() we >> > - * defer the unpinning of the active context to now, retirement of >> > - * the subsequent request. >> > - */ >> > - if (engine->last_retired_context) >> > - intel_context_unpin(engine->last_retired_context); >> > - engine->last_retired_context = rq->hw_context; >> > } >> > >> > static void __retire_engine_upto(struct intel_engine_cs *engine, >> > @@ -759,9 +747,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) >> > >> > rq->infix = rq->ring->emit; /* end of header; start of user payload */ >> > >> > - /* Keep a second pin for the dual retirement along engine and ring */ >> > - __intel_context_pin(ce); >> > - >> > intel_context_mark_active(ce); >> > return rq; >> > >> > diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c >> > index b7f3fbb4ae89..a96d0c012d46 100644 >> > --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c >> > +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c >> > @@ -56,7 +56,6 @@ static void mock_device_release(struct drm_device *dev) >> > >> > mutex_lock(&i915->drm.struct_mutex); >> > mock_device_flush(i915); >> > - i915_gem_contexts_lost(i915); >> > mutex_unlock(&i915->drm.struct_mutex); >> > >> > flush_work(&i915->gem.idle_work); >> > -- >> > 2.20.1 >> > >> > _______________________________________________ >> > Intel-gfx mailing list >> > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx >> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx >> _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx