Re: [RFC PATCH 4/4] drm/i915: reprogram NOA muxes on context switch when using perf

Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> · Wed, 30 Aug 2017 20:33:37 +0100

On 30/08/17 20:15, Chris Wilson wrote:
Quoting Lionel Landwerlin (2017-08-30 19:20:06)
If some of the contexts submitting workloads to the GPU have been
configured to shutdown slices/subslices, we might loose the NOA
configurations written in the NOA muxes. We need to reprogram then at
context switch.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_drv.h  |  2 ++
  drivers/gpu/drm/i915/i915_perf.c | 77 ++++++++++++++++++++++++++++++++++++++++
  drivers/gpu/drm/i915/intel_lrc.c | 64 ++++++++++++++++++++++++++++++---
  drivers/gpu/drm/i915/intel_lrc.h |  1 +
  4 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0003b46b6840..d4b3e5da9009 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3685,6 +3685,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data,
  void i915_oa_init_reg_state(struct intel_engine_cs *engine,
                             struct i915_gem_context *ctx,
                             uint32_t *reg_state);
+u32 i915_oa_get_perctx_bb_size(struct drm_i915_private *dev_priv);
+u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch);
  
  /* i915_gem_evict.c */
  int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 94185d610673..b74ffbb47879 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1687,6 +1687,74 @@ static int gen8_emit_oa_config(struct drm_i915_gem_request *req,
         return 0;
  }
  
+#define MAX_LRI_SIZE (125U)
+
+u32 i915_oa_get_perctx_bb_size(struct drm_i915_private *dev_priv)
+{
+       struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream;
+
+       lockdep_assert_held(&dev_priv->drm.struct_mutex);
Still not happy by this coupling to struct_mutex. :-p

Missed RCS check.

+
+       /* Perf not supported. */
+       if (!dev_priv->perf.initialized)
+               return 0;
+
+       /* OA not currently configured. */
+       if (!stream)
+               return 0;
+
+       /* Very unlikely but possible that we have no muxes to configure. */
+       if (!stream->oa_config->mux_regs_len)
+               return 0;
+
+       /* Return the size of MI_LOAD_REGISTER_IMMs. */
+       return (stream->oa_config->mux_regs_len / MAX_LRI_SIZE) * 4 + 4 +
+               stream->oa_config->mux_regs_len * 8;
+}
+
+u32 *i915_oa_emit_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
+{
+       struct drm_i915_private *dev_priv = engine->i915;
+       struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream;
+       u32 n_lri, n_mux_regs;
+       u32 i;
+
+       lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+       /* We only care about RCS. */
+       if (engine->id != RCS)
+               return batch;
+
+       /* Perf not supported. */
+       if (!dev_priv->perf.initialized)
+               return batch;
+
+       /* OA not currently configured. */
+       if (!stream)
+               return batch;
+
+       /* It's very unlikely, but possible that we're dealing with a config
+        * with no mux to configure.
+        */
+       if (!stream->oa_config->mux_regs_len)
+               return batch;
The above could be condensed into
if (i915_oa_get_perctx_bb_size() == 0)
	return;

+
+       n_mux_regs = stream->oa_config->mux_regs_len;
+       n_lri = (n_mux_regs / MAX_LRI_SIZE) + (n_mux_regs % MAX_LRI_SIZE) != 0;
+
+       for (i = 0; i < n_mux_regs; i++) {
+               if ((i % MAX_LRI_SIZE) == 0) {
+                       n_lri = min(n_mux_regs - i, MAX_LRI_SIZE);
+                       *batch++ = MI_LOAD_REGISTER_IMM(n_lri);
+               }
+
+               *batch++ = i915_mmio_reg_offset(stream->oa_config->mux_regs[i].addr);
+               *batch++ = stream->oa_config->mux_regs[i].value;
+       }
I would have personally used a double loop. But at least kill that first
n_lri, that was a moment of confusion spent trying to work out what you
were using it for.

+
+       return batch;
+}
  /**
   * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
@@ -1055,6 +1057,8 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
   */
  static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
  {
+       batch = i915_oa_emit_perctx_bb(engine, batch);
+
         /* WaDisableCtxRestoreArbitration:bdw,chv */
         *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
         *batch++ = MI_BATCH_BUFFER_END;
@@ -1118,21 +1122,27 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
  
  static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch)
  {
+       batch = i915_oa_emit_perctx_bb(engine, batch);
Wrong wa_bb. This is emitted at the start of every bb, you want
indirectctx_bb which is emitted after a context switch. Or at least I
hope you don't need such a heavy handed approach. If you do, can you
convince me that you are really not in conflict with the application?
(Earlier you said it only needs ctx switch.)

Thanks, I'll rerun my tests with indirectctx_bb.


+int logical_render_ring_reload_wa_bb(struct intel_engine_cs *engine)
intel_lrc_update_wa_bb().

+{
+       struct drm_i915_private *dev_priv = engine->i915;
+       struct i915_ctx_workarounds new_wa_ctx;
+       struct i915_gem_context *ctx;
+       int ret;
+
+       if (WARN_ON(engine->id != RCS))
+               return -EINVAL;
+
+       memset(&new_wa_ctx, 0, sizeof(new_wa_ctx));
+       ret = intel_init_workaround_bb(engine, &new_wa_ctx);
+       if (ret)
+               return ret;
+
+       if (engine->wa_ctx.vma)
+               lrc_destroy_wa_ctx(engine);
Couldn't we at least try to reuse the existing vma first?


_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx