[PATCH v16 15/17] drm/i915/perf: allow NOA muxes reprogramming before workloads

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Dynamic slices/subslices shutdown will effectivelly loose the NOA
configuration uploaded in the slices/subslices.

Here we introduce a new parameter to configure the i915 perf driver
when userspace wants to monitor parts of the GPU within
slices/subslices and it knows that some of the workloads running on
the system will disable slices/subslices.

This new configuration option will force the i915 driver to reemit NOA
configurations between context switches.

v2: Make sure we handle configs with more register writes than the max
    MI_LOAD_REGISTER_IMM can do (Lionel)

v3: Introduce new parameter to the perf driver to make reprogramming
    optional (Lionel)
    Fix off by one calculation of number of required
    MI_LOAD_REGISTER_IMM (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h         |  15 +++++
 drivers/gpu/drm/i915/i915_perf.c        | 100 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   3 +
 include/uapi/drm/i915_drm.h             |  10 ++++
 4 files changed, 128 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d53419e9bfa2..7ef1908e3a98 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1964,6 +1964,12 @@ struct i915_perf_stream {
 	struct i915_gem_context *ctx;
 
 	/**
+	 * @noa_restore: Whether the user who opened the stream requested NOA
+	 * config to be restored between context switches.
+	 */
+	bool noa_restore;
+
+	/**
 	 * @enabled: Whether the stream is currently enabled, considering
 	 * whether the stream was opened in a disabled state and based
 	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
@@ -2401,6 +2407,7 @@ struct drm_i915_private {
 			const struct i915_oa_reg *mux_regs[6];
 			int mux_regs_lens[6];
 			int n_mux_configs;
+			int total_n_mux_regs;
 
 			const struct i915_oa_reg *b_counter_regs;
 			int b_counter_regs_len;
@@ -2493,6 +2500,13 @@ struct drm_i915_private {
 			struct i915_oa_ops ops;
 			const struct i915_oa_format *oa_formats;
 			int n_builtin_sets;
+
+			/**
+			 * Whether the user has requested the NOA
+			 * configuration to be reprogrammed between context
+			 * switches.
+			 */
+			atomic_t noa_restore;
 		} oa;
 	} perf;
 
@@ -3543,6 +3557,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
 void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 			    struct i915_gem_context *ctx,
 			    uint32_t *reg_state);
+int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req);
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5222dac1ee5b..3f49ce69641b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -350,6 +350,8 @@ struct perf_open_properties {
 	u64 single_context:1;
 	u64 ctx_handle;
 
+	bool noa_restore;
+
 	/* OA sampling state */
 	int metrics_set;
 	int oa_format;
@@ -1451,11 +1453,24 @@ static void config_oa_regs(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void count_total_mux_regs(struct drm_i915_private *dev_priv)
+{
+	int i;
+
+	dev_priv->perf.oa.total_n_mux_regs = 0;
+	for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
+		dev_priv->perf.oa.total_n_mux_regs +=
+			dev_priv->perf.oa.mux_regs_lens[i];
+	}
+}
+
 static int hsw_enable_metric_set(struct drm_i915_private *dev_priv)
 {
 	int ret = i915_oa_select_metric_set_hsw(dev_priv);
 	int i;
 
+	count_total_mux_regs(dev_priv);
+
 	if (ret)
 		return ret;
 
@@ -1777,6 +1792,8 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv)
 	int ret = dev_priv->perf.oa.ops.select_metric_set(dev_priv);
 	int i;
 
+	count_total_mux_regs(dev_priv);
+
 	if (ret)
 		return ret;
 
@@ -2065,6 +2082,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 			return ret;
 	}
 
+	if (props->noa_restore) {
+		stream->noa_restore = true;
+		atomic_inc(&dev_priv->perf.oa.noa_restore);
+	}
+
 	ret = alloc_oa_buffer(dev_priv);
 	if (ret)
 		goto err_oa_buf_alloc;
@@ -2121,6 +2143,74 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 	gen8_update_reg_state_unlocked(ctx, reg_state);
 }
 
+int i915_oa_emit_noa_config_locked(struct drm_i915_gem_request *req)
+{
+	struct drm_i915_private *dev_priv = req->i915;
+	int max_load = 125;
+	int n_lri, n_registers, n_loaded_register;
+	int i, j;
+	u32 *cs;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	/* Perf not supported. */
+	if (!dev_priv->perf.initialized)
+		return 0;
+
+	/*
+	 * We do not expect dynamic slice/subslice configuration to change
+	 * across contexts prior to Gen8.
+	 */
+	if (INTEL_GEN(dev_priv) < 8)
+		return 0;
+
+	/* Has the user requested that NOA configuration be restored? */
+	if (!atomic_read(&dev_priv->perf.oa.noa_restore))
+		return 0;
+
+	n_registers = dev_priv->perf.oa.total_n_mux_regs;
+	n_lri = (n_registers / max_load) + (n_registers % max_load) != 0;
+
+	cs = intel_ring_begin(req,
+			      3 * 2 + /* MI_LOAD_REGISTER_IMM for chicken registers */
+			      n_lri + /* MI_LOAD_REGISTER_IMM for mux registers */
+			      n_registers * 2 + /* offset & value for mux registers*/
+			      1 /* NOOP */);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
+	*cs++ = 0xA0;
+
+	n_loaded_register = 0;
+	for (i = 0; i < dev_priv->perf.oa.n_mux_configs; i++) {
+		const struct i915_oa_reg *mux_regs =
+			dev_priv->perf.oa.mux_regs[i];
+		const int mux_regs_len = dev_priv->perf.oa.mux_regs_lens[i];
+
+		for (j = 0; j < mux_regs_len; j++) {
+			if ((n_loaded_register % max_load) == 0) {
+				n_lri = min(n_registers - n_loaded_register, max_load);
+				*cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+			}
+
+			*cs++ = i915_mmio_reg_offset(mux_regs[j].addr);
+			*cs++ = mux_regs[j].value;
+			n_loaded_register++;
+		}
+	}
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(GDT_CHICKEN_BITS);
+	*cs++ = 0x80;
+
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	return 0;
+}
+
 /**
  * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
  * @stream: An i915 perf stream
@@ -2433,9 +2523,14 @@ static long i915_perf_ioctl(struct file *file,
  */
 static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
 {
+	struct drm_i915_private *dev_priv = stream->dev_priv;
+
 	if (stream->enabled)
 		i915_perf_disable_locked(stream);
 
+	if (stream->noa_restore)
+		atomic_dec(&dev_priv->perf.oa.noa_restore);
+
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
 
@@ -2769,6 +2864,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
+		case DRM_I915_PERF_PROP_NOA_RESTORE:
+			props->noa_restore = true;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -3129,6 +3227,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 		mutex_init(&dev_priv->perf.lock);
 		spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
+		atomic_set(&dev_priv->perf.oa.noa_restore, 0);
+
 		oa_sample_rate_hard_limit =
 			dev_priv->perf.oa.timestamp_frequency / 2;
 		dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index acd1da9b62a3..67aaaebb194b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1874,6 +1874,9 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req,
 			!(dispatch_flags & I915_DISPATCH_SECURE);
 	u32 *cs;
 
+	/* Emit NOA config */
+	i915_oa_emit_noa_config_locked(req);
+
 	cs = intel_ring_begin(req, 4);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 15bc9f78ba4d..27cc72a85d07 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1366,6 +1366,16 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
+	/**
+	 * Specifying this property will alterate the behavior of the i915
+	 * driver, by forcing the driver to restore the NOA configuration of
+	 * the OA unit between context switches. You should only use this flag
+	 * if you think that that at least one application running on the
+	 * system is using a SSEU configurations with disabled
+	 * slices/subslices.
+	 */
+	DRM_I915_PERF_PROP_NOA_RESTORE,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux