Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Not all mediated environments yet support HW semaphores, so we should > avoid using one for our preempt-to-busy barrier and instead request that > the CS be paused and not advance on to the next execlist. > > There's a natural advantage with the register writes being serialised > with the writes to the ELSP register that should allow the barrier to be > much more constrained. On the other hand, we can no longer track the > extents of the barrier and so must be a lot more lenient in accepting > early CS events. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Zhenyu Wang <zhenyuw@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_engine_types.h | 8 +++ > drivers/gpu/drm/i915/gt/intel_lrc.c | 69 ++++++++------------ > 2 files changed, 35 insertions(+), 42 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h > index 7e056114344e..1353df264236 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h > @@ -166,6 +166,8 @@ struct intel_engine_execlists { > */ > bool no_priolist; > > + u16 ring_mode; > + > /** > * @submit_reg: gen-specific execlist submission register > * set to the ExecList Submission Port (elsp) register pre-Gen11 and to > @@ -179,6 +181,12 @@ struct intel_engine_execlists { > */ > u32 __iomem *ctrl_reg; > > + /** > + * @ring_mode: the engine control register, used to freeze the command > + * streamer around preempt-to-busy > + */ > + u32 __iomem *ring_reg; > + > #define EXECLIST_MAX_PORTS 2 > /** > * @active: the currently known context executing on HW > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 28685ba91a2c..c2aaab4b743e 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -224,24 +224,18 @@ static void execlists_init_reg_state(u32 *reg_state, > struct intel_engine_cs *engine, > struct intel_ring *ring); > > -static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) > +static inline void ring_set_paused(struct intel_engine_cs *engine, u32 state) > { > - return (i915_ggtt_offset(engine->status_page.vma) + > - I915_GEM_HWS_PREEMPT_ADDR); > -} > + u16 x; > > -static inline void > -ring_set_paused(const struct intel_engine_cs *engine, int state) > -{ > - /* > - * We inspect HWS_PREEMPT with a semaphore inside > - * engine->emit_fini_breadcrumb. If the dword is true, > - * the ring is paused as the semaphore will busywait > - * until the dword is false. > - */ > - engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; > - if (state) > - wmb(); > + x = engine->execlists.ring_mode; > + x &= ~(state >> 16); > + x |= state; > + if (x == engine->execlists.ring_mode) > + return; You want to keep the state to reduce noneffective writes? I would like ring_freeze() and ring_thaw() and the state parameter removed, as I don't see the value in callsites to macro the masked bits. > + > + engine->execlists.ring_mode = x; > + writel(state, engine->execlists.ring_reg); > } > > static inline struct i915_priolist *to_priolist(struct rb_node *rb) > @@ -981,7 +975,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * as we unwind (and until we resubmit) so that we do > * not accidentally tell it to go backwards. > */ > - ring_set_paused(engine, 1); > + ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING)); > > /* > * Note that we have not stopped the GPU at this point, > @@ -1010,7 +1004,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > last->sched.attr.priority, > execlists->queue_priority_hint); > > - ring_set_paused(engine, 1); > + ring_set_paused(engine, _MASKED_BIT_ENABLE(STOP_RING)); > defer_active(engine); > > /* > @@ -1244,9 +1238,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > *port = execlists_schedule_in(last, port - execlists->pending); > memset(port + 1, 0, (last_port - port) * sizeof(*port)); > execlists_submit_ports(engine); > - } else { > - ring_set_paused(engine, 0); > } > + > + if (!inject_preempt_hang(execlists)) > + ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING)); This is just a superset of the previous unpausing now? > } > > void > @@ -1355,9 +1350,6 @@ static void process_csb(struct intel_engine_cs *engine) > > if (enable_timeslice(engine)) > mod_timer(&execlists->timer, jiffies + 1); > - > - if (!inject_preempt_hang(execlists)) > - ring_set_paused(engine, 0); > } else if (status & GEN8_CTX_STATUS_PREEMPTED) { > struct i915_request * const *port = execlists->active; > > @@ -1378,8 +1370,15 @@ static void process_csb(struct intel_engine_cs *engine) > * ordered, that the breadcrumb write is > * coherent (visible from the CPU) before the > * user interrupt and CSB is processed. > + * > + * The caveat here applies when we are injecting > + * a completion event via manipulation of the > + * RING_MI_MODE; this may occur before the > + * request is completed and appears as a > + * normal context-switch (0x14). As in we unpause and get a completion event? -Mika > */ > - GEM_BUG_ON(!i915_request_completed(rq)); > + GEM_BUG_ON(!i915_request_completed(rq) && > + !execlists->pending[0]); > execlists_schedule_out(rq); > > GEM_BUG_ON(execlists->active - execlists->inflight > > @@ -2133,7 +2132,7 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) > struct intel_engine_execlists * const execlists = &engine->execlists; > const unsigned int reset_value = execlists->csb_size - 1; > > - ring_set_paused(engine, 0); > + ring_set_paused(engine, _MASKED_BIT_DISABLE(STOP_RING)); > > /* > * After a reset, the HW starts writing into CSB entry [0]. We > @@ -2581,19 +2580,6 @@ static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) > return cs; > } > > -static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) > -{ > - *cs++ = MI_SEMAPHORE_WAIT | > - MI_SEMAPHORE_GLOBAL_GTT | > - MI_SEMAPHORE_POLL | > - MI_SEMAPHORE_SAD_EQ_SDD; > - *cs++ = 0; > - *cs++ = intel_hws_preempt_address(request->engine); > - *cs++ = 0; > - > - return cs; > -} > - > static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) > { > cs = gen8_emit_ggtt_write(cs, > @@ -2601,9 +2587,7 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) > request->timeline->hwsp_offset, > 0); > *cs++ = MI_USER_INTERRUPT; > - > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > - cs = emit_preempt_busywait(request, cs); > > request->tail = intel_ring_offset(request, cs); > assert_ring_tail_valid(request->ring, request->tail); > @@ -2625,9 +2609,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) > PIPE_CONTROL_CS_STALL, > 0); > *cs++ = MI_USER_INTERRUPT; > - > *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > - cs = emit_preempt_busywait(request, cs); > > request->tail = intel_ring_offset(request, cs); > assert_ring_tail_valid(request->ring, request->tail); > @@ -2807,6 +2789,9 @@ int intel_execlists_submission_init(struct intel_engine_cs *engine) > execlists->csb_write = > &engine->status_page.addr[intel_hws_csb_write_index(i915)]; > > + execlists->ring_reg = > + uncore->regs + i915_mmio_reg_offset(RING_MI_MODE(base)); > + > if (INTEL_GEN(i915) < 11) > execlists->csb_size = GEN8_CSB_ENTRIES; > else > -- > 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx