From: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> We want this so we can preempt performance queries and keep the system responsive even when long running queries are ongoing. We avoid doing it for all contexts. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_lrc.c | 23 +++++--- drivers/gpu/drm/i915/gt/intel_lrc.h | 3 + drivers/gpu/drm/i915/i915_perf.c | 89 +++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9d8eaf8edaab..1bab671dc7a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1218,6 +1218,19 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } +u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine) +{ + u32 value = + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + + if (INTEL_GEN(engine->i915) < 11) + value |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + + return value; +} + static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { @@ -2124,6 +2137,7 @@ __execlists_update_reg_state(const struct intel_context *ce, i915_oa_init_reg_state(ce, engine); } + regs[CTX_CONTEXT_CONTROL] |= intel_lrc_make_ctx_control(engine); } static int @@ -3628,14 +3642,7 @@ static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, const struct intel_ring *ring) { - regs[CTX_CONTEXT_CONTROL] = - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); - if (INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); - + regs[CTX_CONTEXT_CONTROL] = intel_lrc_make_ctx_control(engine); regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; regs[CTX_BB_STATE] = RING_BB_PPGTT; } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 99dc576a4e25..6b2b196f09e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) @@ -145,4 +146,6 @@ struct intel_engine_cs * intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling); +u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index df3b6976ba5b..d0ddd37dc078 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2208,6 +2208,70 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) +{ + struct i915_request *rq; + u32 *cs, offset; + int err = 0; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out; + } + + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset + CTX_CONTEXT_CONTROL * sizeof(u32); + *cs++ = 0; + *cs++ = intel_lrc_make_ctx_control(ce->engine) | + (enable ? + _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) : + _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE)); + + intel_ring_advance(rq, cs); + +out: + i915_request_add(rq); + + return err; +} + +static int gen12_configure_context_oar(struct i915_gem_context *ctx, + bool enable) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + GEM_BUG_ON(ce == ce->engine->kernel_context); + + if (ce->engine->class != RENDER_CLASS) + continue; + + err = intel_context_lock_pinned(ce); + if (err) + break; + + /* Otherwise OA settings will be set upon first use */ + if (intel_context_is_pinned(ce)) + err = gen12_emit_oar_config(ce, enable); + + intel_context_unlock_pinned(ce); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); + + return err; +} + /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -2313,6 +2377,17 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, return err; } + /* + * For Gen12, performance counters are context + * saved/restored. Only enable it for the context that + * requested this. + */ + if (ctx == stream->ctx && IS_GEN(i915, 12)) { + err = gen12_configure_context_oar(ctx, oa_config != NULL); + if (err) + return err; + } + spin_lock(&i915->gem.contexts.lock); list_safe_reset_next(ctx, cn, link); i915_gem_context_put(ctx); @@ -2782,6 +2857,20 @@ void i915_oa_init_reg_state(const struct intel_context *ce, stream = engine->i915->perf.exclusive_stream; if (stream) gen8_update_reg_state_unlocked(ce, stream); + + /* + * Enable context save & restore of performance counters for + * the OAR unit only on the context selected for performance + * queries. + */ + if (IS_GEN(engine->i915, 12)) { + u32 *regs = ce->lrc_reg_state; + + regs[CTX_CONTEXT_CONTROL] |= + (stream && ce->gem_context == stream->ctx) ? + _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) : + _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE); + } } /** -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx