On 5 June 2017 at 15:48, Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> wrote: > Dynamic slices/subslices shutdown will effectivelly loose the NOA > configuration uploaded in the slices/subslices. > > Here we introduce a new parameter to configure the i915 perf driver > when userspace wants to monitor parts of the GPU within > slices/subslices and it knows that some of the workloads running on > the system will disable slices/subslices. > > This new configuration option will force the i915 driver to reemit NOA > configurations between context switches. > > v2: Make sure we handle configs with more register writes than the max > MI_LOAD_REGISTER_IMM can do (Lionel) > > v3: Introduce new parameter to the perf driver to make reprogramming > optional (Lionel) > Fix off by one calculation of number of required > MI_LOAD_REGISTER_IMM (Lionel) > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 15 +++++ > drivers/gpu/drm/i915/i915_perf.c | 100 ++++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_ringbuffer.c | 3 + > include/uapi/drm/i915_drm.h | 10 ++++ > 4 files changed, 128 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d53419e9bfa2..7ef1908e3a98 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1964,6 +1964,12 @@ struct i915_perf_stream { > struct i915_gem_context *ctx; > > /** > + * @noa_restore: Whether the user who opened the stream requested NOA > + * config to be restored between context switches. > + */ > + bool noa_restore; > + > + /** > * @enabled: Whether the stream is currently enabled, considering > * whether the stream was opened in a disabled state and based > * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. > @@ -2401,6 +2407,7 @@ struct drm_i915_private { > const struct i915_oa_reg *mux_regs[6]; > int mux_regs_lens[6]; > int n_mux_configs; > + int total_n_mux_regs; > > const struct i915_oa_reg *b_counter_regs; > int b_counter_regs_len; > @@ -2493,6 +2500,13 @@ struct drm_i915_private { > struct i915_oa_ops ops; > const struct i915_oa_format *oa_formats; > int n_builtin_sets; > + > + /** > + * Whether the user has requested the NOA > + * configuration to be reprogrammed between context > + * switches. > + */ > + atomic_t noa_restore; > } oa; > } perf; > > @@ -3543,6 +3557,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, > void i915_oa_init_reg_state(struct intel_engine_cs *engine, > struct i915_gem_context *ctx, > uint32_t *reg_state); > +int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req); > > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 5222dac1ee5b..3f49ce69641b 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -350,6 +350,8 @@ struct perf_open_properties { > u64 single_context:1; > u64 ctx_handle; > > + bool noa_restore; We need kernel doc for this, as pointed out by kbuilld. > + > /* OA sampling state */ > int metrics_set; > int oa_format; > @@ -1451,11 +1453,24 @@ static void config_oa_regs(struct drm_i915_private *dev_priv, > } > } > > +static void count_total_mux_regs(struct drm_i915_private *dev_priv) > +{ > + int i; > + > + dev_priv->perf.oa.total_n_mux_regs = 0; > + for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { > + dev_priv->perf.oa.total_n_mux_regs += > + dev_priv->perf.oa.mux_regs_lens[i]; > + } > +} > + > static int hsw_enable_metric_set(struct drm_i915_private *dev_priv) > { > int ret = i915_oa_select_metric_set_hsw(dev_priv); > int i; > > + count_total_mux_regs(dev_priv); > + > if (ret) > return ret; > > @@ -1777,6 +1792,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv) > int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv); > int i; > > + count_total_mux_regs(dev_priv); > + > if (ret) > return ret; > > @@ -2065,6 +2082,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > return ret; > } > > + if (props->noa_restore) { > + stream->noa_restore = true; > + atomic_inc(&dev_priv->perf.oa.noa_restore); You need to undo this in the onion teardown. > + } > + > ret = alloc_oa_buffer(dev_priv); > if (ret) > goto err_oa_buf_alloc; > @@ -2121,6 +2143,74 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine, > gen8_update_reg_state_unlocked(ctx, reg_state); > } > > +int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req) > +{ > + struct drm_i915_private *dev_priv = req->i915; > + int max_load = 125; Why 125? > + int n_lri, n_registers, n_loaded_register; > + int i, j; > + u32 *cs; > + > + lockdep_assert_held(&dev_priv->drm.struct_mutex); > + > + /* Perf not supported. */ > + if (!dev_priv->perf.initialized) > + return 0; > + > + /* > + * We do not expect dynamic slice/subslice configuration to change > + * across contexts prior to Gen8. > + */ > + if (INTEL_GEN(dev_priv) < 8) > + return 0; > + > + /* Has the user requested that NOA configuration be restored? */ > + if (!atomic_read(&dev_priv->perf.oa.noa_restore)) > + return 0; > + > + n_registers = dev_priv->perf.oa.total_n_mux_regs; > + n_lri = (n_registers / max_load) + (n_registers % max_load) != 0; > + > + cs = intel_ring_begin(req, > + 3 * 2 + /* MI_LOAD_REGISTER_IMM for chicken registers */ > + n_lri + /* MI_LOAD_REGISTER_IMM for mux registers */ > + n_registers * 2 + /* offset & value for mux registers*/ > + 1 /* NOOP */); > + if (IS_ERR(cs)) > + return PTR_ERR(cs); Do we wan't to silently fail here, shouldn't we be screaming and shouting? Also do we have to do re-upload the noa config unconditionally, is it not feasible to only do so when there is a change in the slice/sublice configuration between contexts? > + > + *cs++ = MI_LOAD_REGISTER_IMM(1); > + *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS); > + *cs++ = 0xA0; > + > + n_loaded_register = 0; > + for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) { > + const struct i915_oa_reg *mux_regs = > + dev_priv->perf.oa.mux_regs[i]; > + const int mux_regs_len = dev_priv->perf.oa.mux_regs_lens[i]; > + > + for (j = 0; j < mux_regs_len; j++) { > + if ((n_loaded_register % max_load) == 0) { > + n_lri = min(n_registers - n_loaded_register, max_load); > + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); > + } > + > + *cs++ = i915_mmio_reg_offset(mux_regs[j].addr); > + *cs++ = mux_regs[j].value; > + n_loaded_register++; > + } > + } > + > + *cs++ = MI_LOAD_REGISTER_IMM(1); > + *cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS); > + *cs++ = 0x80; > + > + *cs++ = MI_NOOP; > + intel_ring_advance(req, cs); > + > + return 0; > +} > + > /** > * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation > * @stream: An i915 perf stream > @@ -2433,9 +2523,14 @@ static long i915_perf_ioctl(struct file *file, > */ > static void i915_perf_destroy_locked(struct i915_perf_stream *stream) > { > + struct drm_i915_private *dev_priv = stream->dev_priv; > + > if (stream->enabled) > i915_perf_disable_locked(stream); > > + if (stream->noa_restore) > + atomic_dec(&dev_priv->perf.oa.noa_restore); > + > if (stream->ops->destroy) > stream->ops->destroy(stream); > > @@ -2769,6 +2864,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, > props->oa_periodic = true; > props->oa_period_exponent = value; > break; > + case DRM_I915_PERF_PROP_NOA_RESTORE: > + props->noa_restore = true; > + break; > case DRM_I915_PERF_PROP_MAX: > MISSING_CASE(id); > return -EINVAL; > @@ -3129,6 +3227,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv) > mutex_init(&dev_priv->perf.lock); > spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock); > > + atomic_set(&dev_priv->perf.oa.noa_restore, 0); > + > oa_sample_rate_hard_limit = > dev_priv->perf.oa.timestamp_frequency / 2; > dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index acd1da9b62a3..67aaaebb194b 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1874,6 +1874,9 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, > !(dispatch_flags & I915_DISPATCH_SECURE); > u32 *cs; > > + /* Emit NOA config */ > + i915_oa_emit_noa_config_locked(req); > + > cs = intel_ring_begin(req, 4); > if (IS_ERR(cs)) > return PTR_ERR(cs); > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 15bc9f78ba4d..27cc72a85d07 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -1366,6 +1366,16 @@ enum drm_i915_perf_property_id { > */ > DRM_I915_PERF_PROP_OA_EXPONENT, > > + /** > + * Specifying this property will alterate the behavior of the i915 behaviour > + * driver, by forcing the driver to restore the NOA configuration of > + * the OA unit between context switches. You should only use this flag > + * if you think that that at least one application running on the > + * system is using a SSEU configurations with disabled > + * slices/subslices. > + */ > + DRM_I915_PERF_PROP_NOA_RESTORE, > + > DRM_I915_PERF_PROP_MAX /* non-ABI */ > }; > > -- > 2.11.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx