If some of the contexts submitting workloads to the GPU have been configured to shutdown slices/subslices, we might loose the NOA configurations written in the NOA muxes. We need to reprogram them at context switch. v2: Do reprogramming in indirect-ctx batchbuffer (Chris) Simplify emission by reusing i915_oa_get_perctx_bb_size() (Chris) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_perf.c | 130 ++++++++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_lrc.c | 61 +++++++++++++++++- drivers/gpu/drm/i915/intel_lrc.h | 1 + 4 files changed, 156 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 43d83ffae2d3..5157bf68323e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3706,6 +3706,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); +u32 i915_oa_get_perctx_bb_size(struct intel_engine_cs *engine); +u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1b753c53abfa..0e049567c7a1 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1688,6 +1688,67 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req, return 0; } +#define MAX_LRI_SIZE (125U) + +u32 i915_oa_get_perctx_bb_size(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream; + struct i915_oa_config *oa_config; + u32 n_lri; + + /* We only care about RCS. */ + if (engine->id != RCS) + return 0; + + /* Perf not supported. */ + if (!dev_priv->perf.initialized) + return 0; + + /* OA not currently configured. */ + if (!stream) + return 0; + + oa_config = stream->oa_config; + + /* Very unlikely but possible that we have no muxes to configure. */ + if (!oa_config->mux_regs_len) + return 0; + + n_lri = (oa_config->mux_regs_len / MAX_LRI_SIZE) + + (oa_config->mux_regs_len % MAX_LRI_SIZE) != 0; + + /* Return the size of MI_LOAD_REGISTER_IMMs. */ + return n_lri * 4 + oa_config->mux_regs_len * 8; +} + +u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_oa_config *oa_config; + u32 i, n_loaded_regs; + + if (i915_oa_get_perctx_bb_size(engine) == 0) + return batch; + + oa_config = dev_priv->perf.oa.exclusive_stream->oa_config; + + n_loaded_regs = 0; + for (i = 0; i < oa_config->mux_regs_len; i++) { + if ((n_loaded_regs % MAX_LRI_SIZE) == 0) { + u32 n_lri = min(oa_config->mux_regs_len - n_loaded_regs, + MAX_LRI_SIZE); + *batch++ = MI_LOAD_REGISTER_IMM(n_lri); + } + + *batch++ = i915_mmio_reg_offset(oa_config->mux_regs[i].addr); + *batch++ = oa_config->mux_regs[i].value; + n_loaded_regs++; + } + + return batch; +} + static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { @@ -1754,28 +1815,17 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr * * Note: it's only the RCS/Render context that has any OA state. */ -static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, - const struct i915_oa_config *oa_config, - bool interruptible) +static int gen8_configure_all_contexts_unlocked(struct drm_i915_private *dev_priv, + const struct i915_oa_config *oa_config, + unsigned int wait_flags) { struct i915_gem_context *ctx; int ret; - unsigned int wait_flags = I915_WAIT_LOCKED; - - if (interruptible) { - ret = i915_mutex_lock_interruptible(&dev_priv->drm); - if (ret) - return ret; - - wait_flags |= I915_WAIT_INTERRUPTIBLE; - } else { - mutex_lock(&dev_priv->drm.struct_mutex); - } /* Switch away from any user context. */ ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config); if (ret) - goto out; + return ret; /* * The OA register config is setup through the context image. This image @@ -1792,7 +1842,16 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, */ ret = i915_gem_wait_for_idle(dev_priv, wait_flags); if (ret) - goto out; + return ret; + + /* + * Reload the workaround batchbuffer to include NOA muxes + * reprogramming on context-switch, so we don't loose configurations + * after switch-from a context with disabled slices/subslices. + */ + ret = logical_render_ring_reload_wa_bb(dev_priv->engine[RCS]); + if (ret) + return ret; /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { @@ -1804,10 +1863,8 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, continue; regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); - if (IS_ERR(regs)) { - ret = PTR_ERR(regs); - goto out; - } + if (IS_ERR(regs)) + return PTR_ERR(regs); ce->state->obj->mm.dirty = true; regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); @@ -1817,9 +1874,6 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, i915_gem_object_unpin_map(ce->state->obj); } - out: - mutex_unlock(&dev_priv->drm.struct_mutex); - return ret; } @@ -1864,7 +1918,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(dev_priv, oa_config, true); + ret = gen8_configure_all_contexts_unlocked(dev_priv, oa_config, + I915_WAIT_LOCKED | I915_WAIT_INTERRUPTIBLE); if (ret) return ret; @@ -1879,7 +1934,11 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) { /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(dev_priv, NULL, false); + mutex_lock(&dev_priv->drm.struct_mutex); + + gen8_configure_all_contexts_unlocked(dev_priv, NULL, I915_WAIT_LOCKED); + + mutex_unlock(&dev_priv->drm.struct_mutex); I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & ~GT_NOA_ENABLE)); @@ -2129,13 +2188,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (ret) goto err_oa_buf_alloc; - ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, - stream->oa_config); - if (ret) - goto err_enable; - - stream->ops = &i915_oa_stream_ops; - /* Lock device for exclusive_stream access late because * enable_metric_set() might lock as well on gen8+. */ @@ -2145,14 +2197,22 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, dev_priv->perf.oa.exclusive_stream = stream; + ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, + stream->oa_config); + if (ret) + goto err_enable; + + stream->ops = &i915_oa_stream_ops; + mutex_unlock(&dev_priv->drm.struct_mutex); return 0; -err_lock: - dev_priv->perf.oa.ops.disable_metric_set(dev_priv); - err_enable: + dev_priv->perf.oa.exclusive_stream = NULL; + mutex_unlock(&dev_priv->drm.struct_mutex); + +err_lock: free_oa_buffer(dev_priv); err_oa_buf_alloc: diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1cb67f8c0ff9..f8aaa59eca86 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -217,6 +217,8 @@ static void execlists_init_reg_state(u32 *reg_state, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring); +static void execlists_init_reg_state_wa_bb(u32 *reg_state, + struct intel_engine_cs *engine); /** * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists @@ -1014,6 +1016,8 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) */ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { + batch = i915_oa_emit_perctx_bb(engine, batch); + /* WaDisableCtxRestoreArbitration:bdw,chv */ *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; @@ -1064,6 +1068,8 @@ static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { + batch = i915_oa_emit_perctx_bb(engine, batch); + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); @@ -1123,16 +1129,20 @@ static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) +/* Reserve 200 dwords for indirect & per-ctx bb */ +#define CTX_WA_BB_MIN_DWORDS (200) static int lrc_setup_wa_ctx(struct intel_engine_cs *engine, struct i915_ctx_workarounds *wa_ctx) { struct drm_i915_gem_object *obj; struct i915_vma *vma; + u32 size = DIV_ROUND_UP(i915_oa_get_perctx_bb_size(engine) + + 4 * CTX_WA_BB_MIN_DWORDS, + PAGE_SIZE) * PAGE_SIZE; int err; - obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); + obj = i915_gem_object_create(engine->i915, size); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1215,7 +1225,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine, wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } - BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + BUG_ON(batch_ptr - batch > wa_ctx->vma->obj->base.size); kunmap_atomic(batch); if (ret) @@ -1844,6 +1854,51 @@ int logical_render_ring_init(struct intel_engine_cs *engine) return logical_ring_init(engine); } +int logical_render_ring_reload_wa_bb(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_ctx_workarounds new_wa_ctx; + struct i915_gem_context *ctx; + int ret; + + if (WARN_ON(engine->id != RCS)) + return -EINVAL; + + memset(&new_wa_ctx, 0, sizeof(new_wa_ctx)); + ret = intel_init_workaround_bb(engine, &new_wa_ctx); + if (ret) + return ret; + + if (engine->wa_ctx.vma) + lrc_destroy_wa_ctx(engine); + + memcpy(&engine->wa_ctx, &new_wa_ctx, sizeof(engine->wa_ctx)); + + list_for_each_entry(ctx, &dev_priv->contexts.list, link) { + struct intel_context *ce = &ctx->engine[RCS]; + u32 *regs; + + /* Settings will be set upon first use. */ + if (!ce->state) + continue; + + regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); + if (IS_ERR(regs)) { + ret = PTR_ERR(regs); + break; + } + + ce->state->obj->mm.dirty = true; + regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs); + + execlists_init_reg_state_wa_bb(regs, engine); + + i915_gem_object_unpin_map(ce->state->obj); + } + + return ret; +} + int logical_xcs_ring_init(struct intel_engine_cs *engine) { logical_ring_setup(engine); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4ef6a6143f5d..bbaf9f563ad5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -65,6 +65,7 @@ enum { /* Logical Rings */ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); +int logical_render_ring_reload_wa_bb(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); /* Logical Ring Contexts */ -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx