We're currently doing one workaround where we're using scratch as a temporary storage place, while we're overwriting the value of one register with some known constant value in order to perform a workaround. While we could just do similar thing with CS_GPR register and MI_LOAD_REGISTER_REG instead of scratch, since we would use CS_GPR anyways, let's just drop the constant values and do the bitops using MI_MATH. Signed-off-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_engine.h | 53 ++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 3 -- drivers/gpu/drm/i915/gt/intel_lrc.c | 27 +++--------- 3 files changed, 58 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d3c6993f4f46..2dfe0b23af1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -400,6 +400,59 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) return cs; } +/* + * We're using CS_GPR registers for the MI_MATH ops. + * Note that user contexts are free to use those registers, which means that we + * should only use this this function during context initialization, before + * context restore (WA batch) or inside i915-owned contexts. + */ +static inline u32 * +gen8_emit_bitop_reg_mask(struct intel_engine_cs *engine, + u32 *cs, u32 op, i915_reg_t reg, u32 mask) +{ + const u32 base = engine->mmio_base; + + GEM_BUG_ON(op != MI_MATH_OR && op != MI_MATH_AND); + + *cs++ = MI_LOAD_REGISTER_REG; + *cs++ = i915_mmio_reg_offset(reg); + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0)); + *cs++ = MI_NOOP; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 1)); + *cs++ = mask; + *cs++ = MI_NOOP; + + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(0)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(1)); + *cs++ = op; + *cs++ = MI_MATH_STORE(MI_MATH_REG(0), MI_MATH_REG_ACCU); + *cs++ = MI_NOOP; + + *cs++ = MI_LOAD_REGISTER_REG; + *cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(base, 0)); + *cs++ = i915_mmio_reg_offset(reg); + *cs++ = MI_NOOP; + + return cs; +} + +static inline u32 * +gen8_emit_set_register(struct intel_engine_cs *engine, + u32 *cs, i915_reg_t reg, u32 mask) +{ + return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_OR, reg, mask); +} + +static inline u32 * +gen8_emit_clear_register(struct intel_engine_cs *engine, + u32 *cs, i915_reg_t reg, u32 mask) +{ + return gen8_emit_bitop_reg_mask(engine, cs, MI_MATH_AND, reg, ~mask); +} + static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index e64db4c13df6..d9f25ac520a8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -92,9 +92,6 @@ enum intel_gt_scratch_field { /* 8 bytes */ INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH = 128, - /* 8 bytes */ - INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256, - }; #endif /* __INTEL_GT_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index fa385218ce92..089c17599ca4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2269,13 +2269,7 @@ static int execlists_request_alloc(struct i915_request *request) * PIPE_CONTROL instruction. This is required for the flush to happen correctly * but there is a slight complication as this is applied in WA batch where the * values are only initialized once so we cannot take register value at the - * beginning and reuse it further; hence we save its value to memory, upload a - * constant value with bit21 set and then we restore it back with the saved value. - * To simplify the WA, a constant value is formed by using the default value - * of this register. This shouldn't be a problem because we are only modifying - * it for a short period and this batch in non-premptible. We can ofcourse - * use additional instructions that read the actual value of the register - * at that time and set our bit of interest but it makes the WA complicated. + * beginning and reuse it further; * * This WA is also required for Gen9 so extracting as a function avoids * code duplication. @@ -2283,27 +2277,16 @@ static int execlists_request_alloc(struct i915_request *request) static u32 * gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { - /* NB no one else is allowed to scribble over scratch + 256! */ - *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); - *batch++ = 0; - - *batch++ = MI_LOAD_REGISTER_IMM(1); - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; + batch = gen8_emit_set_register(engine, batch, GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); batch = gen8_emit_pipe_control(batch, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE, 0); - *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; - *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); - *batch++ = intel_gt_scratch_offset(engine->gt, - INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA); - *batch++ = 0; + batch = gen8_emit_clear_register(engine, batch, GEN8_L3SQCREG4, + GEN8_LQSC_FLUSH_COHERENT_LINES); return batch; } -- 2.21.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx