If some of the contexts submitting workloads to the GPU have been configured to shutdown slices/subslices, we might loose the NOA configurations written in the NOA muxes. We need to reprogram then at context switch. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_perf.c | 77 ++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 64 ++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 140 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0003b46b6840..d4b3e5da9009 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3685,6 +3685,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); +u32 i915_oa_get_perctx_bb_size(struct drm_i915_private *dev_priv); +u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 94185d610673..b74ffbb47879 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1687,6 +1687,74 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, return 0; } +#define MAX_LRI_SIZE (125U) + +u32 i915_oa_get_perctx_bb_size(struct drm_i915_private *dev_priv) +{ + struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* Perf not supported. */ + if (!dev_priv->perf.initialized) + return 0; + + /* OA not currently configured. */ + if (!stream) + return 0; + + /* Very unlikely but possible that we have no muxes to configure. */ + if (!stream->oa_config->mux_regs_len) + return 0; + + /* Return the size of MI_LOAD_REGISTER_IMMs. */ + return (stream->oa_config->mux_regs_len / MAX_LRI_SIZE) * 4 + 4 + + stream->oa_config->mux_regs_len * 8; +} + +u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream; + u32 n_lri, n_mux_regs; + u32 i; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* We only care about RCS. */ + if (engine->id != RCS) + return batch; + + /* Perf not supported. */ + if (!dev_priv->perf.initialized) + return batch; + + /* OA not currently configured. */ + if (!stream) + return batch; + + /* It's very unlikely, but possible that we're dealing with a config + * with no mux to configure. + */ + if (!stream->oa_config->mux_regs_len) + return batch; + + n_mux_regs = stream->oa_config->mux_regs_len; + n_lri = (n_mux_regs / MAX_LRI_SIZE) + (n_mux_regs % MAX_LRI_SIZE) != 0; + + for (i = 0; i < n_mux_regs; i++) { + if ((i % MAX_LRI_SIZE) == 0) { + n_lri = min(n_mux_regs - i, MAX_LRI_SIZE); + *batch++ = MI_LOAD_REGISTER_IMM(n_lri); + } + + *batch++ = i915_mmio_reg_offset(stream->oa_config->mux_regs[i].addr); + *batch++ = stream->oa_config->mux_regs[i].value; + } + + return batch; +} + static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { @@ -1793,6 +1861,15 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, if (ret) goto out; + /* + * Reload the workaround batchbuffer to include NOA muxes + * reprogramming on context-switch, so we don't loose configurations + * after switch-from a context with disabled slices/subslices. + */ + ret = logical_render_ring_reload_wa_bb(dev_priv->engine[RCS]); + if (ret) + return ret; + /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { struct intel_context *ce = &ctx->engine[RCS]; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c7e7c355b0a7..60639624045b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -217,6 +217,8 @@ static void execlists_init_reg_state(u32 *reg_state, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring); +static void execlists_init_reg_state_wa_bb(u32 *reg_state, + struct intel_engine_cs *engine); /** * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists @@ -1055,6 +1057,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) */ static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { + batch = i915_oa_emit_perctx_bb(engine, batch); + /* WaDisableCtxRestoreArbitration:bdw,chv */ *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; *batch++ = MI_BATCH_BUFFER_END; @@ -1118,21 +1122,27 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { + batch = i915_oa_emit_perctx_bb(engine, batch); + *batch++ = MI_BATCH_BUFFER_END; return batch; } -#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) +/* Reserve 200 dwords for indirect & per-ctx bb */ +#define CTX_WA_BB_MIN_DWORDS (200) static int lrc_setup_wa_ctx(struct intel_engine_cs *engine, struct i915_ctx_workarounds *wa_ctx) { struct drm_i915_gem_object *obj; struct i915_vma *vma; + u32 size = DIV_ROUND_UP(i915_oa_get_perctx_bb_size(engine->i915) + + 4 * CTX_WA_BB_MIN_DWORDS, + PAGE_SIZE) * PAGE_SIZE; int err; - obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); + obj = i915_gem_object_create(engine->i915, size); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1142,7 +1152,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine, goto err; } - err = i915_vma_pin(vma, 0, CTX_WA_BB_OBJ_SIZE, PIN_GLOBAL | PIN_HIGH); + err = i915_vma_pin(vma, 0, size, PIN_GLOBAL | PIN_HIGH); if (err) goto err; @@ -1215,7 +1225,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine, wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } - BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + BUG_ON(batch_ptr - batch > wa_ctx->vma->obj->base.size); kunmap_atomic(batch); if (ret) @@ -1844,6 +1854,52 @@ int logical_render_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } +int logical_render_ring_reload_wa_bb(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_ctx_workarounds new_wa_ctx; + struct i915_gem_context *ctx; + int ret; + + if (WARN_ON(engine->id != RCS)) + return -EINVAL; + + memset(&new_wa_ctx, 0, sizeof(new_wa_ctx)); + ret = intel_init_workaround_bb(engine, &new_wa_ctx); + if (ret) + return ret; + + if (engine->wa_ctx.vma) + lrc_destroy_wa_ctx(engine); + + memcpy(&engine->wa_ctx, &new_wa_ctx, sizeof(engine->wa_ctx)); + + list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + struct intel_context *ce = &ctx->engine[RCS]; + u32 *regs; + + /* Settings will be set upon first use. */ + if (!ce->state) + continue; + + regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + break; + } + + ce->state->obj->mm.dirty = true; + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + + if (engine->wa_ctx.vma) + execlists_init_reg_state_wa_bb(regs, engine); + + i915_gem_object_unpin_map(ce->state->obj); + } + + return ret; +} + int logical_xcs_ring_init(struct intel_engine_cs *engine) { logical_ring_setup(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4ef6a6143f5d..bbaf9f563ad5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -65,6 +65,7 @@ enum { /* Logical Rings */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); +int logical_render_ring_reload_wa_bb(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx