Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > On Tigerlake, MI_SEMAPHORE_WAIT grew an extra dword, so be sure to > update the length field and emit that extra parameter and any padding > noop as required. > > v2: Define the token shift while we are adding the updated MI_SEMAPHORE_WAIT > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > Cc: Michal Winiarski <michal.winiarski@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 2 + > drivers/gpu/drm/i915/gt/intel_lrc.c | 71 ++++++++++++++++++-- > drivers/gpu/drm/i915/i915_pci.c | 1 - > drivers/gpu/drm/i915/i915_request.c | 21 ++++-- > 4 files changed, 83 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > index fbad403ab7ac..da2025bc332c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h > @@ -112,6 +112,7 @@ > #define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ > #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) > #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ > +#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */ > #define MI_SEMAPHORE_POLL (1 << 15) > #define MI_SEMAPHORE_SAD_GT_SDD (0 << 12) > #define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12) > @@ -119,6 +120,7 @@ > #define MI_SEMAPHORE_SAD_LTE_SDD (3 << 12) > #define MI_SEMAPHORE_SAD_EQ_SDD (4 << 12) > #define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12) > +#define MI_SEMAPHORE_TOKEN_SHIFT 5 Do we need a mask too? > #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) > #define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) > #define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 64fa2db5905f..c74fc75e4980 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -3237,6 +3237,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) > return gen8_emit_fini_breadcrumb_footer(request, cs); > } > > +static u32 * > +gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) > +{ > + cs = gen8_emit_ggtt_write_rcs(cs, > + request->fence.seqno, > + request->timeline->hwsp_offset, > + PIPE_CONTROL_CS_STALL | > + PIPE_CONTROL_TILE_CACHE_FLUSH | > + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | > + PIPE_CONTROL_DEPTH_CACHE_FLUSH | > + PIPE_CONTROL_DC_FLUSH_ENABLE | > + PIPE_CONTROL_FLUSH_ENABLE); > + > + return gen8_emit_fini_breadcrumb_footer(request, cs); > +} > + > /* > * Note that the CS instruction pre-parser will not stall on the breadcrumb > * flush and will continue pre-fetching the instructions after it before the > @@ -3255,8 +3271,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) > * All the above applies only to the instructions themselves. Non-inline data > * used by the instructions is not pre-fetched. > */ > -static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, > - u32 *cs) > + > +static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) > +{ > + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | > + MI_SEMAPHORE_GLOBAL_GTT | > + MI_SEMAPHORE_POLL | > + MI_SEMAPHORE_SAD_EQ_SDD; > + *cs++ = 0; > + *cs++ = intel_hws_preempt_address(request->engine); This is supposed to be in canonical form. > + *cs++ = 0; > + *cs++ = 0; > + *cs++ = MI_NOOP; > + > + return cs; > +} > + > +static __always_inline u32* > +gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) > +{ > + *cs++ = MI_USER_INTERRUPT; > + > + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; > + if (intel_engine_has_semaphores(request->engine)) > + cs = gen12_emit_preempt_busywait(request, cs); > + > + request->tail = intel_ring_offset(request, cs); > + assert_ring_tail_valid(request->ring, request->tail); > + > + return gen8_emit_wa_tail(request, cs); > +} > + > +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) > +{ > + cs = gen8_emit_ggtt_write(cs, > + request->fence.seqno, > + request->timeline->hwsp_offset, > + 0); > + > + return gen12_emit_fini_breadcrumb_footer(request, cs); > +} > + > +static u32 * > +gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) > { > cs = gen8_emit_ggtt_write_rcs(cs, > request->fence.seqno, > @@ -3268,7 +3325,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, > PIPE_CONTROL_DC_FLUSH_ENABLE | > PIPE_CONTROL_FLUSH_ENABLE); > > - return gen8_emit_fini_breadcrumb_footer(request, cs); > + return gen12_emit_fini_breadcrumb_footer(request, cs); > } > > static void execlists_park(struct intel_engine_cs *engine) > @@ -3297,9 +3354,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) > engine->flags |= I915_ENGINE_HAS_PREEMPTION; > } > > - if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */ > - engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES; > - > if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 11) this is not against tip :) > engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; > } > @@ -3329,6 +3383,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) > engine->emit_flush = gen8_emit_flush; > engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; > engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; > + if (INTEL_GEN(engine->i915) >= 12) > + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; > > engine->set_default_submission = intel_execlists_set_default_submission; > > @@ -3374,6 +3430,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine) > { > switch (INTEL_GEN(engine->i915)) { > case 12: > + engine->emit_flush = gen11_emit_flush_render; > + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; > + break; > case 11: > engine->emit_flush = gen11_emit_flush_render; > engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index ee9a7959204c..f0e6172ccd97 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -798,7 +798,6 @@ static const struct intel_device_info intel_tigerlake_12_info = { > .engine_mask = > BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2), > .has_rc6 = false, /* XXX disabled for debugging */ > - .has_logical_ring_preemption = false, /* XXX disabled for debugging */ > .engine_mask = BIT(RCS0), /* XXX reduced for debugging */ > }; > > diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c > index 754a78364a63..a967afff5e51 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -783,7 +783,9 @@ emit_semaphore_wait(struct i915_request *to, > struct i915_request *from, > gfp_t gfp) > { > + bool has_token = INTEL_GEN(to->i915) >= 12; > u32 hwsp_offset; > + int len; > u32 *cs; > int err; > > @@ -810,7 +812,11 @@ emit_semaphore_wait(struct i915_request *to, > if (err) > return err; > > - cs = intel_ring_begin(to, 4); > + len = 4; > + if (has_token) > + len += 2; > + > + cs = intel_ring_begin(to, len); > if (IS_ERR(cs)) > return PTR_ERR(cs); > > @@ -822,13 +828,18 @@ emit_semaphore_wait(struct i915_request *to, > * (post-wrap) values than they were expecting (and so wait > * forever). > */ > - *cs++ = MI_SEMAPHORE_WAIT | > - MI_SEMAPHORE_GLOBAL_GTT | > - MI_SEMAPHORE_POLL | > - MI_SEMAPHORE_SAD_GTE_SDD; > + *cs++ = (MI_SEMAPHORE_WAIT | > + MI_SEMAPHORE_GLOBAL_GTT | > + MI_SEMAPHORE_POLL | > + MI_SEMAPHORE_SAD_GTE_SDD) + > + has_token; Pls change to int :O Also, should we just pass the token in anticipation that it will be used in future? -Mika > *cs++ = from->fence.seqno; > *cs++ = hwsp_offset; > *cs++ = 0; > + if (has_token) { > + *cs++ = 0; > + *cs++ = MI_NOOP; > + } > > intel_ring_advance(to, cs); > to->sched.semaphores |= from->engine->mask; > -- > 2.23.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx