If some of the contexts submitting workloads to the GPU have been configured to shutdown slices/subslices, we might loose the NOA configurations written in the NOA muxes. We need to reprogram them at context switch. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_perf.c | 92 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_lrc.c | 71 +++++++++++++++++++++--- drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 153 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fdf71164ee24..b15f1fa4453a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3263,6 +3263,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); +u32 i915_oa_get_perctx_bb_size(struct intel_engine_cs *engine); +u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2fc9c85a0d99..d1598f54e63b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1752,6 +1752,71 @@ static int gen8_emit_oa_config(struct i915_request *rq, return 0; } +#define MAX_LRI_SIZE (125U) + +u32 i915_oa_get_perctx_bb_size(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream; + struct i915_oa_config *oa_config; + u32 n_lri; + + /* We only care about RCS. */ + if (engine->id != RCS) + return 0; + + /* Perf not supported. */ + if (!dev_priv->perf.initialized) + return 0; + + /* OA not currently configured. */ + if (!stream) + return 0; + + oa_config = stream->oa_config; + + /* Very unlikely but possible that we have no muxes to configure. */ + if (!oa_config->mux_regs_len) + return 0; + + n_lri = (oa_config->mux_regs_len / MAX_LRI_SIZE) + + (oa_config->mux_regs_len % MAX_LRI_SIZE) != 0; + + /* Return the size of MI_LOAD_REGISTER_IMMs + PIPE_CONTROL . */ + return n_lri * 4 + oa_config->mux_regs_len * 8 + 24; +} + +u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_oa_config *oa_config; + u32 i, n_loaded_regs; + + if (i915_oa_get_perctx_bb_size(engine) == 0) + return batch; + + oa_config = dev_priv->perf.oa.exclusive_stream->oa_config; + + n_loaded_regs = 0; + for (i = 0; i < oa_config->mux_regs_len; i++) { + if ((n_loaded_regs % MAX_LRI_SIZE) == 0) { + u32 n_lri = min(oa_config->mux_regs_len - n_loaded_regs, + MAX_LRI_SIZE); + *batch++ = MI_LOAD_REGISTER_IMM(n_lri); + } + + *batch++ = i915_mmio_reg_offset(oa_config->mux_regs[i].addr); + *batch++ = oa_config->mux_regs[i].value; + n_loaded_regs++; + } + + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_MMIO_WRITE, + 0); + + return batch; +} + static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { @@ -1829,7 +1894,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Switch away from any user context. */ ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); if (ret) - goto out; + return ret; /* * The OA register config is setup through the context image. This image @@ -1846,7 +1911,16 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, */ ret = i915_gem_wait_for_idle(dev_priv, wait_flags); if (ret) - goto out; + return ret; + + /* + * Reload the workaround batchbuffer to include NOA muxes + * reprogramming on context-switch, so we don't loose configurations + * after switch-from a context with disabled slices/subslices. + */ + ret = logical_render_ring_reload_wa_bb(dev_priv->engine[RCS]); + if (ret) + return ret; /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { @@ -1858,10 +1932,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, continue; regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); - if (IS_ERR(regs)) { - ret = PTR_ERR(regs); - goto out; - } + if (IS_ERR(regs)) + return PTR_ERR(regs); ce->state->obj->mm.dirty = true; regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); @@ -1871,7 +1943,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, i915_gem_object_unpin_map(ce->state->obj); } - out: return ret; } @@ -2213,6 +2284,13 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, dev_priv->perf.oa.exclusive_stream = stream; + ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, + stream->oa_config); + if (ret) + goto err_enable; + + stream->ops = &i915_oa_stream_ops; + mutex_unlock(&dev_priv->drm.struct_mutex); return 0; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7a5efab3e4fb..a0f72fcda0d9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -169,6 +169,8 @@ static void execlists_init_reg_state(u32 *reg_state, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring); +static void execlists_init_reg_state_wa_bb(u32 *reg_state, + struct intel_engine_cs *engine); static inline struct i915_priolist *to_priolist(struct rb_node *rb) { @@ -1584,16 +1586,28 @@ gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) +static u32 *gen_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + batch = i915_oa_emit_perctx_bb(engine, batch); + *batch++ = MI_BATCH_BUFFER_END; + + return batch; +} + +/* Reserve a minimum of 200 dwords for indirect bb */ +#define CTX_WA_BB_MIN_DWORDS (200) static int lrc_setup_wa_ctx(struct intel_engine_cs *engine, struct i915_ctx_workarounds *wa_ctx) { struct drm_i915_gem_object *obj; struct i915_vma *vma; + u32 n_pages = DIV_ROUND_UP(i915_oa_get_perctx_bb_size(engine) + + 4 * CTX_WA_BB_MIN_DWORDS, + PAGE_SIZE); int err; - obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); + obj = i915_gem_object_create(engine->i915, n_pages * PAGE_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1639,15 +1653,15 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine, switch (INTEL_GEN(engine->i915)) { case 10: wa_bb_fn[0] = gen10_init_indirectctx_bb; - wa_bb_fn[1] = NULL; + wa_bb_fn[1] = gen_init_perctx_bb; break; case 9: wa_bb_fn[0] = gen9_init_indirectctx_bb; - wa_bb_fn[1] = NULL; + wa_bb_fn[1] = gen_init_perctx_bb; break; case 8: wa_bb_fn[0] = gen8_init_indirectctx_bb; - wa_bb_fn[1] = NULL; + wa_bb_fn[1] = gen_init_perctx_bb; break; default: MISSING_CASE(INTEL_GEN(engine->i915)); @@ -1680,7 +1694,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine, wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } - BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + BUG_ON(batch_ptr - batch > wa_ctx->vma->obj->base.size); kunmap_atomic(batch); if (ret) @@ -2321,6 +2335,51 @@ int logical_render_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } +int logical_render_ring_reload_wa_bb(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_ctx_workarounds new_wa_ctx; + struct i915_gem_context *ctx; + int ret; + + if (WARN_ON(engine->id != RCS)) + return -EINVAL; + + memset(&new_wa_ctx, 0, sizeof(new_wa_ctx)); + ret = intel_init_workaround_bb(engine, &new_wa_ctx); + if (ret) + return ret; + + if (engine->wa_ctx.vma) + lrc_destroy_wa_ctx(engine); + + memcpy(&engine->wa_ctx, &new_wa_ctx, sizeof(engine->wa_ctx)); + + list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + struct intel_context *ce = &ctx->engine[RCS]; + u32 *regs; + + /* Settings will be set upon first use. */ + if (!ce->state) + continue; + + regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + break; + } + + ce->state->obj->mm.dirty = true; + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + + execlists_init_reg_state_wa_bb(regs, engine); + + i915_gem_object_unpin_map(ce->state->obj); + } + + return ret; +} + int logical_xcs_ring_init(struct intel_engine_cs *engine) { logical_ring_setup(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 59d7b86012e9..d91d69a17206 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -71,6 +71,7 @@ enum { /* Logical Rings */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); +int logical_render_ring_reload_wa_bb(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ -- 2.17.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx