[PATCH 08/12] drm/i915: Add support for emitting execbuffer tags through OA counter reports

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Sourab Gupta <sourab.gupta@xxxxxxxxx>

This patch enables userspace to specify tags (per workload), provided via
execbuffer ioctl, which could be added to OA reports, to help associate
reports with the corresponding workloads.

There may be multiple stages within a single context, from a userspace
perspective. An ability is needed to individually associate the OA reports
with their corresponding workloads(execbuffers), which may not be possible
solely with ctx_id or pid information. This patch enables such a mechanism.

In this patch, upper 32 bits of rsvd1 field, which were previously unused
are now being used to pass in the tag.

v2: Updated i915_execbuffer2_get_tag to get the tag properly. (Sagar)
Changed tag size to 64 bit to ensure all sample fields are aligned at 8
bytes. (Lionel)

Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx>
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h            | 18 +++++++++++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 11 +++++---
 drivers/gpu/drm/i915/i915_perf.c           | 41 ++++++++++++++++++++++++++----
 include/uapi/drm/i915_drm.h                | 12 +++++++++
 4 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b56ea20..c4f7462 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1996,7 +1996,8 @@ struct i915_perf_stream_ops {
 	 */
 	void (*emit_sample_capture)(struct i915_perf_stream *stream,
 				    struct drm_i915_gem_request *request,
-				    bool preallocate);
+				    bool preallocate,
+				    u32 tag);
 };
 
 enum i915_perf_stream_state {
@@ -2080,6 +2081,7 @@ struct i915_perf_stream {
 
 	u32 last_ctx_id;
 	u32 last_pid;
+	u32 last_tag;
 };
 
 /**
@@ -2196,6 +2198,17 @@ struct i915_perf_cs_sample {
 	 * submitted, pertaining to this perf sample
 	 */
 	u32 pid;
+
+	/**
+	 * @tag: Tag associated with workload, for which the perf sample is
+	 * being collected.
+	 *
+	 * Userspace can specify tags (provided via execbuffer ioctl), which
+	 * can be associated with the perf samples, and be used to functionally
+	 * distinguish different workload stages, and associate samples with
+	 * these different stages.
+	 */
+	u32 tag;
 };
 
 struct intel_cdclk_state {
@@ -3723,7 +3736,8 @@ void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 			    struct i915_gem_context *ctx,
 			    uint32_t *reg_state);
 void i915_perf_emit_sample_capture(struct drm_i915_gem_request *req,
-				   bool preallocate);
+				   bool preallocate,
+				   u32 tag);
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bfe546b..92585df 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -242,6 +242,7 @@ struct i915_execbuffer {
 	 */
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
+	uint32_t tag;
 };
 
 /*
@@ -1194,7 +1195,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_request;
 
-	i915_perf_emit_sample_capture(rq, true);
+	i915_perf_emit_sample_capture(rq, true, eb->tag);
 
 	err = eb->engine->emit_bb_start(rq,
 					batch->node.start, PAGE_SIZE,
@@ -1202,7 +1203,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 	if (err)
 		goto err_request;
 
-	i915_perf_emit_sample_capture(rq, false);
+	i915_perf_emit_sample_capture(rq, false, eb->tag);
 
 	GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
 	i915_vma_move_to_active(batch, rq, 0);
@@ -2033,7 +2034,7 @@ static int eb_submit(struct i915_execbuffer *eb)
 			return err;
 	}
 
-	i915_perf_emit_sample_capture(eb->request, true);
+	i915_perf_emit_sample_capture(eb->request, true, eb->tag);
 
 	err = eb->engine->emit_bb_start(eb->request,
 					eb->batch->node.start +
@@ -2043,7 +2044,7 @@ static int eb_submit(struct i915_execbuffer *eb)
 	if (err)
 		return err;
 
-	i915_perf_emit_sample_capture(eb->request, false);
+	i915_perf_emit_sample_capture(eb->request, false, eb->tag);
 
 	return 0;
 }
@@ -2168,6 +2169,8 @@ static int eb_submit(struct i915_execbuffer *eb)
 	if (!eb.engine)
 		return -EINVAL;
 
+	eb.tag	= i915_execbuffer2_get_tag(*args);
+
 	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
 		if (!HAS_RESOURCE_STREAMER(eb.i915)) {
 			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5ac1a41..c7f8e7f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -294,6 +294,7 @@ struct i915_perf_sample_data {
 	u64 source;
 	u64 ctx_id;
 	u64 pid;
+	u64 tag;
 	const u8 *report;
 };
 
@@ -350,6 +351,7 @@ struct i915_perf_sample_data {
 #define SAMPLE_OA_SOURCE      (1<<1)
 #define SAMPLE_CTX_ID	      (1<<2)
 #define SAMPLE_PID	      (1<<3)
+#define SAMPLE_TAG	      (1<<4)
 
 /**
  * struct perf_open_properties - for validated properties given to open a stream
@@ -402,12 +404,14 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
  * the command stream of a GPU engine.
  * @request: request in whose context the metrics are being collected.
  * @preallocate: allocate space in ring for related sample.
+ * @tag: userspace provided tag to be associated with the perf sample
  *
  * The function provides a hook through which the commands to capture perf
  * metrics, are inserted into the command stream of a GPU engine.
  */
 void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request,
-				   bool preallocate)
+				   bool preallocate,
+				   u32 tag)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct drm_i915_private *dev_priv = engine->i915;
@@ -422,7 +426,8 @@ void i915_perf_emit_sample_capture(struct drm_i915_gem_request *request,
 	if (stream && (stream->state == I915_PERF_STREAM_ENABLED) &&
 				stream->cs_mode)
 		stream->ops->emit_sample_capture(stream, request,
-						 preallocate);
+						 preallocate, tag);
+
 	srcu_read_unlock(&engine->perf_srcu, idx);
 }
 
@@ -591,11 +596,13 @@ static int i915_emit_oa_report_capture(
  * @stream: An i915-perf stream opened for GPU metrics
  * @request: request in whose context the metrics are being collected.
  * @preallocate: allocate space in ring for related sample.
+ * @tag: userspace provided tag to be associated with the perf sample
  */
 static void i915_perf_stream_emit_sample_capture(
 					struct i915_perf_stream *stream,
 					struct drm_i915_gem_request *request,
-					bool preallocate)
+					bool preallocate,
+					u32 tag)
 {
 	struct reservation_object *resv = stream->cs_buffer.vma->resv;
 	struct i915_perf_cs_sample *sample;
@@ -611,6 +618,7 @@ static void i915_perf_stream_emit_sample_capture(
 	sample->request = i915_gem_request_get(request);
 	sample->ctx_id = request->ctx->hw_id;
 	sample->pid = current->pid;
+	sample->tag = tag;
 
 	insert_perf_sample(stream, sample);
 
@@ -933,6 +941,12 @@ static int append_perf_sample(struct i915_perf_stream *stream,
 		buf += 8;
 	}
 
+	if (sample_flags & SAMPLE_TAG) {
+		if (copy_to_user(buf, &data->tag, 8))
+			return -EFAULT;
+		buf += 8;
+	}
+
 	if (sample_flags & SAMPLE_OA_REPORT) {
 		if (copy_to_user(buf, data->report, report_size))
 			return -EFAULT;
@@ -973,6 +987,9 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 	if (sample_flags & SAMPLE_PID)
 		data.pid = stream->last_pid;
 
+	if (sample_flags & SAMPLE_TAG)
+		data.tag = stream->last_tag;
+
 	if (sample_flags & SAMPLE_OA_REPORT)
 		data.report = report;
 
@@ -1575,6 +1592,11 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
 		stream->last_pid = node->pid;
 	}
 
+	if (sample_flags & SAMPLE_TAG) {
+		data.tag = node->tag;
+		stream->last_tag = node->tag;
+	}
+
 	return append_perf_sample(stream, buf, count, offset, &data);
 }
 
@@ -2736,7 +2758,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	bool require_oa_unit = props->sample_flags & (SAMPLE_OA_REPORT |
 						      SAMPLE_OA_SOURCE);
-	bool require_cs_mode = props->sample_flags & SAMPLE_PID;
+	bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
+						      SAMPLE_TAG);
 	bool cs_sample_data = props->sample_flags & SAMPLE_OA_REPORT;
 	struct i915_perf_stream *curr_stream;
 	struct intel_engine_cs *engine = NULL;
@@ -2895,7 +2918,7 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 	}
 
 	if (require_cs_mode && !props->cs_mode) {
-		DRM_ERROR("PID sampling requires a ring to be specified");
+		DRM_ERROR("PID/TAG sampling requires a ring to be specified");
 		ret = -EINVAL;
 		goto err_enable;
 	}
@@ -2924,6 +2947,11 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 			stream->sample_size += 8;
 		}
 
+		if (props->sample_flags & SAMPLE_TAG) {
+			stream->sample_flags |= SAMPLE_TAG;
+			stream->sample_size += 8;
+		}
+
 		engine = dev_priv->engine[props->engine];
 
 		idx = srcu_read_lock(&engine->perf_srcu);
@@ -3641,6 +3669,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		case DRM_I915_PERF_PROP_SAMPLE_PID:
 			props->sample_flags |= SAMPLE_PID;
 			break;
+		case DRM_I915_PERF_PROP_SAMPLE_TAG:
+			props->sample_flags |= SAMPLE_TAG;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 34d8e41..0e522d4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -935,6 +935,11 @@ struct drm_i915_gem_execbuffer2 {
 #define i915_execbuffer2_get_context_id(eb2) \
 	((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
 
+/* upper 32 bits of rsvd1 field contain tag */
+#define I915_EXEC_TAG_MASK		(0xffffffff00000000UL)
+#define i915_execbuffer2_get_tag(eb2) \
+	(((eb2).rsvd1 & I915_EXEC_TAG_MASK) >> 32)
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
@@ -1414,6 +1419,12 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_SAMPLE_PID,
 
+	/**
+	 * The value of this property set to 1 requests inclusion of tag in the
+	 * perf sample data.
+	 */
+	DRM_I915_PERF_PROP_SAMPLE_TAG,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
@@ -1482,6 +1493,7 @@ enum drm_i915_perf_record_type {
 	 *     { u64 source; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE
 	 *     { u64 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
 	 *     { u64 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
+	 *     { u64 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
 	 *     { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
 	 * };
 	 */
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux