[RFC 6/8] drm/i915: Routines for inserting OA capture commands in the ringbuffer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Sourab Gupta <sourab.gupta@xxxxxxxxx>

This patch introduces the routines which insert commands for capturing OA
snapshots, into the ringbuffer for RCS engine.
The command MI_REPORT_PERF_COUNT can be used to capture	snapshots of OA
counters. The routines introduced in this patch can be called to insert these
commands at appropriate points during workload submission

Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_dma.c     |  1 +
 drivers/gpu/drm/i915/i915_drv.h     |  3 ++
 drivers/gpu/drm/i915/i915_oa_perf.c | 86 +++++++++++++++++++++++++++++++++++++
 3 files changed, 90 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 0553f20..f12feaa 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -821,6 +821,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	/* Must at least be registered before trying to pin any context
 	 * otherwise i915_oa_context_pin_notify() will lock an un-initialized
 	 * spinlock, upsetting lockdep checks */
+	INIT_LIST_HEAD(&dev_priv->profile_cmd);
 	i915_oa_pmu_register(dev);
 
 	intel_pm_setup(dev);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5453842..798da49 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1982,6 +1982,8 @@ struct drm_i915_private {
 		struct work_struct work_event_stop;
 		struct completion complete;
 	} oa_pmu;
+
+	struct list_head profile_cmd;
 #endif
 
 	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
@@ -3162,6 +3164,7 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
 				struct intel_context *context);
 void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv,
 				  struct intel_context *context);
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id);
 #else
 static inline void
 i915_oa_context_pin_notify(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c
index 5d63dab..b02850c 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -25,6 +25,76 @@ static int hsw_perf_format_sizes[] = {
 	64   /* C4_B8_HSW */
 };
 
+struct drm_i915_insert_cmd {
+	struct list_head list;
+	void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id);
+};
+
+void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_insert_cmd *entry;
+
+	list_for_each_entry(entry, &dev_priv->profile_cmd, list)
+		entry->insert_cmd(ringbuf, ctx_id);
+}
+
+void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_oa_async_node_info *node_info = NULL;
+	struct drm_i915_oa_async_queue_header *queue_hdr =
+			(struct drm_i915_oa_async_queue_header *)
+			dev_priv->oa_pmu.oa_async_buffer.addr;
+	void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset;
+	int data_size =	(queue_hdr->size_in_bytes - queue_hdr->data_offset);
+	u32 data_offset, addr = 0;
+	int ret;
+
+	struct drm_i915_oa_async_node *nodes = data_ptr;
+	int num_nodes = 0;
+	int index = 0;
+
+	/* OA counters are only supported on the render ring */
+	if (ring->id != RCS)
+		return;
+
+	num_nodes = data_size / sizeof(*nodes);
+	index = queue_hdr->node_count % num_nodes;
+
+	data_offset = offsetof(struct drm_i915_oa_async_node, report_perf);
+
+	addr = i915_gem_obj_ggtt_offset(dev_priv->oa_pmu.oa_async_buffer.obj) +
+		queue_hdr->data_offset +
+		index * sizeof(struct drm_i915_oa_async_node) +
+		data_offset;
+
+	/* addr should be 64 byte aligned */
+	BUG_ON(addr & 0x3f);
+
+	ret = intel_ring_begin(ring, 4);
+	if (ret)
+		return;
+
+	intel_ring_emit(ring, MI_REPORT_PERF_COUNT | (1<<0));
+	intel_ring_emit(ring, addr | MI_REPORT_PERF_COUNT_GGTT);
+	intel_ring_emit(ring, ring->outstanding_lazy_request->seqno);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	node_info = &nodes[index].node_info;
+	i915_gem_request_assign(&node_info->req,
+				ring->outstanding_lazy_request);
+
+	node_info->pid = current->pid;
+	node_info->ctx_id = ctx_id;
+	queue_hdr->node_count++;
+	if (queue_hdr->node_count > num_nodes)
+		queue_hdr->wrap_count++;
+}
+
 static void init_oa_async_buf_queue(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_oa_async_queue_header *hdr =
@@ -865,6 +935,7 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work)
 		container_of(__work, typeof(*dev_priv),
 			oa_pmu.work_event_stop);
 	struct perf_event *event = dev_priv->oa_pmu.exclusive_event;
+	struct drm_i915_insert_cmd *entry, *next;
 	struct drm_i915_oa_async_queue_header *hdr =
 		(struct drm_i915_oa_async_queue_header *)
 		dev_priv->oa_pmu.oa_async_buffer.addr;
@@ -882,6 +953,13 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work)
 	if (ret)
 		return;
 
+	list_for_each_entry_safe(entry, next, &dev_priv->profile_cmd, list) {
+		if (entry->insert_cmd == i915_oa_insert_cmd) {
+			list_del(&entry->list);
+			kfree(entry);
+		}
+	}
+
 	dev_priv->oa_pmu.event_active = false;
 
 	i915_oa_async_wait_gpu(dev_priv);
@@ -920,8 +998,14 @@ static void i915_oa_event_start(struct perf_event *event, int flags)
 	struct drm_i915_private *dev_priv =
 		container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu);
 	unsigned long lock_flags;
+	struct drm_i915_insert_cmd *entry;
 	u32 oastatus1, tail;
 
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return;
+	entry->insert_cmd = i915_oa_insert_cmd;
+
 	if (dev_priv->oa_pmu.metrics_set == I915_OA_METRICS_SET_3D) {
 		config_oa_regs(dev_priv, i915_oa_3d_mux_config_hsw,
 				i915_oa_3d_mux_config_hsw_len);
@@ -976,6 +1060,8 @@ static void i915_oa_event_start(struct perf_event *event, int flags)
 	dev_priv->oa_pmu.event_active = true;
 	update_oacontrol(dev_priv);
 
+	list_add_tail(&entry->list, &dev_priv->profile_cmd);
+
 	/* Reset the head ptr to ensure we don't forward reports relating
 	 * to a previous perf event */
 	oastatus1 = I915_READ(GEN7_OASTATUS1);
-- 
1.8.5.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux