Dynamic slices/subslices shutdown will effectivelly loose the NOA configuration uploaded in the slices/subslices. Here we introduce a new parameter to configure the i915 perf driver when userspace wants to monitor parts of the GPU within slices/subslices and it knows that some of the workloads running on the system will disable slices/subslices. This new configuration option will force the i915 driver to reemit NOA configurations between context switches. v2: Make sure we handle configs with more register writes than the max MI_LOAD_REGISTER_IMM can do (Lionel) v3: Introduce new parameter to the perf driver to make reprogramming optional (Lionel) Fix off by one calculation of number of required MI_LOAD_REGISTER_IMM (Lionel) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 15 +++++ drivers/gpu/drm/i915/i915_perf.c | 100 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 3 + include/uapi/drm/i915_drm.h | 10 ++++ 4 files changed, 128 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d53419e9bfa2..7ef1908e3a98 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1964,6 +1964,12 @@ struct i915_perf_stream { struct i915_gem_context *ctx; /** + * @noa_restore: Whether the user who opened the stream requested NOA + * config to be restored between context switches. + */ + bool noa_restore; + + /** * @enabled: Whether the stream is currently enabled, considering * whether the stream was opened in a disabled state and based * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. @@ -2401,6 +2407,7 @@ struct drm_i915_private { const struct i915_oa_reg *mux_regs[6]; int mux_regs_lens[6]; int n_mux_configs; + int total_n_mux_regs; const struct i915_oa_reg *b_counter_regs; int b_counter_regs_len; @@ -2493,6 +2500,13 @@ struct drm_i915_private { struct i915_oa_ops ops; const struct i915_oa_format *oa_formats; int n_builtin_sets; + + /** + * Whether the user has requested the NOA + * configuration to be reprogrammed between context + * switches. + */ + atomic_t noa_restore; } oa; } perf; @@ -3543,6 +3557,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); +int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 5222dac1ee5b..3f49ce69641b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -350,6 +350,8 @@ struct perf_open_properties { u64 single_context:1; u64 ctx_handle; + bool noa_restore; + /* OA sampling state */ int metrics_set; int oa_format; @@ -1451,11 +1453,24 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, } } +static void count_total_mux_regs(struct drm_i915_private *dev_priv) +{ + int i; + + dev_priv->perf.oa.total_n_mux_regs = 0; + for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { + dev_priv->perf.oa.total_n_mux_regs += + dev_priv->perf.oa.mux_regs_lens[i]; + } +} + static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) { int ret = i915_oa_select_metric_set_hsw(dev_priv); int i; + count_total_mux_regs(dev_priv); + if (ret) return ret; @@ -1777,6 +1792,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv); int i; + count_total_mux_regs(dev_priv); + if (ret) return ret; @@ -2065,6 +2082,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, return ret; } + if (props->noa_restore) { + stream->noa_restore = true; + atomic_inc(&dev_priv->perf.oa.noa_restore); + } + ret = alloc_oa_buffer(dev_priv); if (ret) goto err_oa_buf_alloc; @@ -2121,6 +2143,74 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, gen8_update_reg_state_unlocked(ctx, reg_state); } +int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req) +{ + struct drm_i915_private *dev_priv = req->i915; + int max_load = 125; + int n_lri, n_registers, n_loaded_register; + int i, j; + u32 *cs; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* Perf not supported. */ + if (!dev_priv->perf.initialized) + return 0; + + /* + * We do not expect dynamic slice/subslice configuration to change + * across contexts prior to Gen8. + */ + if (INTEL_GEN(dev_priv) < 8) + return 0; + + /* Has the user requested that NOA configuration be restored? */ + if (!atomic_read(&dev_priv->perf.oa.noa_restore)) + return 0; + + n_registers = dev_priv->perf.oa.total_n_mux_regs; + n_lri = (n_registers / max_load) + (n_registers % max_load) != 0; + + cs = intel_ring_begin(req, + 3 * 2 + /* MI_LOAD_REGISTER_IMM for chicken registers */ + n_lri + /* MI_LOAD_REGISTER_IMM for mux registers */ + n_registers * 2 + /* offset & value for mux registers*/ + 1 /* NOOP */); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS); + *cs++ = 0xA0; + + n_loaded_register = 0; + for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { + const struct i915_oa_reg *mux_regs = + dev_priv->perf.oa.mux_regs[i]; + const int mux_regs_len = dev_priv->perf.oa.mux_regs_lens[i]; + + for (j = 0; j < mux_regs_len; j++) { + if ((n_loaded_register % max_load) == 0) { + n_lri = min(n_registers - n_loaded_register, max_load); + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + + *cs++ = i915_mmio_reg_offset(mux_regs[j].addr); + *cs++ = mux_regs[j].value; + n_loaded_register++; + } + } + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS); + *cs++ = 0x80; + + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + return 0; +} + /** * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation * @stream: An i915 perf stream @@ -2433,9 +2523,14 @@ static long i915_perf_ioctl(struct file *file, */ static void i915_perf_destroy_locked(struct i915_perf_stream *stream) { + struct drm_i915_private *dev_priv = stream->dev_priv; + if (stream->enabled) i915_perf_disable_locked(stream); + if (stream->noa_restore) + atomic_dec(&dev_priv->perf.oa.noa_restore); + if (stream->ops->destroy) stream->ops->destroy(stream); @@ -2769,6 +2864,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->oa_periodic = true; props->oa_period_exponent = value; break; + case DRM_I915_PERF_PROP_NOA_RESTORE: + props->noa_restore = true; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; @@ -3129,6 +3227,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->perf.lock); spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); + atomic_set(&dev_priv->perf.oa.noa_restore, 0); + oa_sample_rate_hard_limit = dev_priv->perf.oa.timestamp_frequency / 2; dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index acd1da9b62a3..67aaaebb194b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1874,6 +1874,9 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, !(dispatch_flags & I915_DISPATCH_SECURE); u32 *cs; + /* Emit NOA config */ + i915_oa_emit_noa_config_locked(req); + cs = intel_ring_begin(req, 4); if (IS_ERR(cs)) return PTR_ERR(cs); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 15bc9f78ba4d..27cc72a85d07 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1366,6 +1366,16 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_OA_EXPONENT, + /** + * Specifying this property will alterate the behavior of the i915 + * driver, by forcing the driver to restore the NOA configuration of + * the OA unit between context switches. You should only use this flag + * if you think that that at least one application running on the + * system is using a SSEU configurations with disabled + * slices/subslices. + */ + DRM_I915_PERF_PROP_NOA_RESTORE, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; -- 2.11.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx