Re: [PATCH 5/9] drm/i915/perf: Group engines into respective OA groups

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 16, 2023 at 12:51:34PM +0200, Jani Nikula wrote:
On Tue, 14 Feb 2023, Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx> wrote:
Now that we may have multiple OA units in a single GT as well as on
separate GTs, create an engine group that maps to a single OA unit.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx>
---
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   4 +
 drivers/gpu/drm/i915/gt/intel_sseu.c         |   3 +-
 drivers/gpu/drm/i915/i915_perf.c             | 126 +++++++++++++++++--
 drivers/gpu/drm/i915/i915_perf_types.h       |  51 +++++++-
 4 files changed, 171 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 4fd54fb8810f..8a8b0dce241b 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -53,6 +53,8 @@ struct intel_gt;
 struct intel_ring;
 struct intel_uncore;
 struct intel_breadcrumbs;
+struct intel_engine_cs;
+struct i915_perf_group;

 typedef u32 intel_engine_mask_t;
 #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
@@ -603,6 +605,8 @@ struct intel_engine_cs {
 	} props, defaults;

 	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
+
+	struct i915_perf_group *oa_group;
 };

 static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 6c6198a257ac..1141f875f5bd 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -6,6 +6,7 @@
 #include <linux/string_helpers.h>

 #include "i915_drv.h"
+#include "i915_perf_types.h"
 #include "intel_engine_regs.h"
 #include "intel_gt_regs.h"
 #include "intel_sseu.h"
@@ -677,7 +678,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
 	 * If i915/perf is active, we want a stable powergating configuration
 	 * on the system. Use the configuration pinned by i915/perf.
 	 */
-	if (gt->perf.exclusive_stream)
+	if (gt->perf.group && gt->perf.group[PERF_GROUP_OAG].exclusive_stream)
 		req_sseu = &gt->perf.sseu;

 	slices = hweight8(req_sseu->slice_mask);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e134523576f8..fda779b2c16f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1587,8 +1587,9 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
 	struct i915_perf *perf = stream->perf;
 	struct intel_gt *gt = stream->engine->gt;
+	struct i915_perf_group *g = stream->engine->oa_group;

-	if (WARN_ON(stream != gt->perf.exclusive_stream))
+	if (WARN_ON(stream != g->exclusive_stream))
 		return;

 	/*
@@ -1597,7 +1598,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 	 *
 	 * See i915_oa_init_reg_state() and lrc_configure_all_contexts()
 	 */
-	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+	WRITE_ONCE(g->exclusive_stream, NULL);
 	perf->ops.disable_metric_set(stream);

 	free_oa_buffer(stream);
@@ -3195,6 +3196,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 {
 	struct drm_i915_private *i915 = stream->perf->i915;
 	struct i915_perf *perf = stream->perf;
+	struct i915_perf_group *g;
 	struct intel_gt *gt;
 	int ret;

@@ -3205,6 +3207,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	}
 	gt = props->engine->gt;

+	g = props->engine->oa_group;
+	if (!g) {
+		DRM_DEBUG("Perf group invalid\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * If the sysfs metrics/ directory wasn't registered for some
 	 * reason then don't let userspace try their luck with config
@@ -3234,7 +3242,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	 * counter reports and marshal to the appropriate client
 	 * we currently only allow exclusive access
 	 */
-	if (gt->perf.exclusive_stream) {
+	if (g->exclusive_stream) {
 		drm_dbg(&stream->perf->i915->drm,
 			"OA unit already in use\n");
 		return -EBUSY;
@@ -3329,7 +3337,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	stream->ops = &i915_oa_stream_ops;

 	stream->engine->gt->perf.sseu = props->sseu;
-	WRITE_ONCE(gt->perf.exclusive_stream, stream);
+	WRITE_ONCE(g->exclusive_stream, stream);

 	ret = i915_perf_stream_enable_sync(stream);
 	if (ret) {
@@ -3352,7 +3360,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	return 0;

 err_enable:
-	WRITE_ONCE(gt->perf.exclusive_stream, NULL);
+	WRITE_ONCE(g->exclusive_stream, NULL);
 	perf->ops.disable_metric_set(stream);

 	free_oa_buffer(stream);
@@ -3381,12 +3389,13 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
 			    const struct intel_engine_cs *engine)
 {
 	struct i915_perf_stream *stream;
+	struct i915_perf_group *g = engine->oa_group;

-	if (!engine_supports_oa(engine))
+	if (!g)
 		return;

 	/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
-	stream = READ_ONCE(engine->gt->perf.exclusive_stream);
+	stream = READ_ONCE(g->exclusive_stream);
 	if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
 		gen8_update_reg_state_unlocked(ce, stream);
 }
@@ -4755,6 +4764,95 @@ static struct ctl_table oa_table[] = {
 	{}
 };

+static u32 __num_perf_groups_per_gt(struct intel_gt *gt)
+{
+	enum intel_platform platform = INTEL_INFO(gt->i915)->platform;
+
+	switch (platform) {
+	default:
+		return 1;
+	}
+}
+
+static u32 __oa_engine_group(struct intel_engine_cs *engine)
+{
+	if (!engine_supports_oa(engine))
+		return PERF_GROUP_INVALID;
+
+	switch (engine->class) {
+	case RENDER_CLASS:
+		return PERF_GROUP_OAG;
+
+	default:
+		return PERF_GROUP_INVALID;
+	}
+}
+
+static void oa_init_groups(struct intel_gt *gt)
+{
+	int i, num_groups = gt->perf.num_perf_groups;
+	struct i915_perf *perf = &gt->i915->perf;
+
+	for (i = 0; i < num_groups; i++) {
+		struct i915_perf_group *g = &gt->perf.group[i];
+
+		/* Fused off engines can result in a group with num_engines == 0 */
+		if (g->num_engines == 0)
+			continue;
+
+		/* Set oa_unit_ids now to ensure ids remain contiguous. */
+		g->oa_unit_id = perf->oa_unit_ids++;
+
+		g->gt = gt;
+	}
+}
+
+static int oa_init_gt(struct intel_gt *gt)
+{
+	u32 num_groups = __num_perf_groups_per_gt(gt);
+	struct intel_engine_cs *engine;
+	struct i915_perf_group *g;
+	intel_engine_mask_t tmp;
+
+	g = kcalloc(num_groups, sizeof(*g), GFP_KERNEL);
+	if (drm_WARN_ON(&gt->i915->drm, !g))
+		return -ENOMEM;

No warnings or messages on -ENOMEM is standard policy.

Will remove warn ON.


+
+	for_each_engine_masked(engine, gt, ALL_ENGINES, tmp) {
+		u32 index;
+
+		index = __oa_engine_group(engine);
+		if (index < num_groups) {
+			g[index].engine_mask |= BIT(engine->id);
+			g[index].num_engines++;
+			engine->oa_group = &g[index];
+		} else {
+			engine->oa_group = NULL;
+		}
+	}
+
+	gt->perf.num_perf_groups = num_groups;
+	gt->perf.group = g;
+
+	oa_init_groups(gt);
+
+	return 0;
+}
+
+static int oa_init_engine_groups(struct i915_perf *perf)
+{
+	struct intel_gt *gt;
+	int i, ret;
+
+	for_each_gt(gt, perf->i915, i) {
+		ret = oa_init_gt(gt);
+		if (ret)
+			return ret;

If this fails in the middle, you'll leave things in half-initialized
state when returning, and expect the caller to clean it up. But that's a
surprising interface design. If i915_perf_init() returns an error, it's
*not* customary to have to call the corresponding cleanup function. On
the contrary, you only call it on success. Any init failures need to be
cleaned up internally before returning.

ENOMEM is the only error I am expecting here, so no other cleanup is needed if this fails.

Thanks,
Umesh




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux