Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > Quoting Mika Kuoppala (2019-08-12 17:01:07) >> Add tile cache flushing for gen11. To relive us from the >> burden of previous obsolete workarounds, make a dedicated >> flush/invalidate callback for gen11. >> >> To fortify an independent single flush, do post >> sync op as there are indications that without it >> we don't flush everything. This should also make this >> callback more readily usable in tgl (see l3 fabric flush). >> >> Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> >> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> >> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> >> --- >> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + >> drivers/gpu/drm/i915/gt/intel_lrc.c | 63 +++++++++++++++++++- >> 2 files changed, 63 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >> index 6a0879c27d14..929a17e54f2c 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >> @@ -208,6 +208,7 @@ >> #define DISPLAY_PLANE_A (0<<20) >> #define DISPLAY_PLANE_B (1<<20) >> #define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) >> +#define PIPE_CONTROL_TILE_CACHE_FLUSH (1<<28) /* gen11+ */ > Ack. > >> #define PIPE_CONTROL_FLUSH_L3 (1<<27) >> #define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ >> #define PIPE_CONTROL_MMIO_WRITE (1<<23) >> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c >> index b97047d58d3d..891db3c933c9 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >> @@ -2655,6 +2655,64 @@ static int gen8_emit_flush_render(struct i915_request *request, >> return 0; >> } >> >> +static int gen11_emit_flush_render(struct i915_request *request, >> + u32 mode) >> +{ >> + struct intel_engine_cs *engine = request->engine; >> + const u32 scratch_addr = >> + intel_gt_scratch_offset(engine->gt, >> + INTEL_GT_SCRATCH_FIELD_RENDER_FLUSH); >> + >> + if (mode & EMIT_FLUSH) { >> + u32 *cs; >> + u32 flags = 0; >> + >> + flags |= PIPE_CONTROL_CS_STALL; >> + >> + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; >> + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; >> + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; >> + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; >> + flags |= PIPE_CONTROL_FLUSH_ENABLE; >> + flags |= PIPE_CONTROL_QW_WRITE; >> + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; > > Ok. > >> + >> + cs = intel_ring_begin(request, 6); >> + if (IS_ERR(cs)) >> + return PTR_ERR(cs); >> + >> + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); >> + > > Personally, I wouldn't leave a newline here. Will remove. Pipe control length could be also defined but meh. >> + intel_ring_advance(request, cs); >> + } >> + >> + if (mode & EMIT_INVALIDATE) { >> + u32 *cs; >> + u32 flags = 0; >> + >> + flags |= PIPE_CONTROL_CS_STALL; >> + >> + flags |= PIPE_CONTROL_TLB_INVALIDATE; >> + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; >> + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; >> + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; >> + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; >> + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; >> + flags |= PIPE_CONTROL_QW_WRITE; >> + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; > > Ok. > >> + >> + cs = intel_ring_begin(request, 6); >> + if (IS_ERR(cs)) >> + return PTR_ERR(cs); >> + >> + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); >> + >> + intel_ring_advance(request, cs); >> + } >> + >> + return 0; >> +} >> + >> /* >> * Reserve space for 2 NOOPs at the end of each request to be >> * used as a workaround for not being allowed to do lite >> @@ -2829,7 +2887,10 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) >> logical_ring_default_irqs(engine); >> >> if (engine->class == RENDER_CLASS) { >> - engine->emit_flush = gen8_emit_flush_render; >> + if (INTEL_GEN(engine->i915) >= 11) >> + engine->emit_flush = gen11_emit_flush_render; >> + else >> + engine->emit_flush = gen8_emit_flush_render; >> engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; > > No fini breadcrumb flush? Well there are room for cleaning there for sure. I admit to being a bit coward. I don't have an icelake and changing that would warrant quite amount of gem_concurrent_blt cycles. If you can convince me that ci/bat will catch my mistakes, I could reconsider. -Mika _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx