From: Sourab Gupta <sourab.gupta@xxxxxxxxx> This patch introduces the routines which insert commands for capturing OA snapshots, into the ringbuffer for RCS engine. The command MI_REPORT_PERF_COUNT can be used to capture snapshots of OA counters. The routines introduced in this patch can be called to insert these commands at appropriate points during workload submission Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/i915_oa_perf.c | 86 +++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 0553f20..f12feaa 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -821,6 +821,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) /* Must at least be registered before trying to pin any context * otherwise i915_oa_context_pin_notify() will lock an un-initialized * spinlock, upsetting lockdep checks */ + INIT_LIST_HEAD(&dev_priv->profile_cmd); i915_oa_pmu_register(dev); intel_pm_setup(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5453842..798da49 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1982,6 +1982,8 @@ struct drm_i915_private { struct work_struct work_event_stop; struct completion complete; } oa_pmu; + + struct list_head profile_cmd; #endif /* Abstract the submission mechanism (legacy ringbuffer or execlists) away */ @@ -3162,6 +3164,7 @@ void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv, struct intel_context *context); void i915_oa_context_unpin_notify(struct drm_i915_private *dev_priv, struct intel_context *context); +void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id); #else static inline void i915_oa_context_pin_notify(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index 5d63dab..b02850c 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -25,6 +25,76 @@ static int hsw_perf_format_sizes[] = { 64 /* C4_B8_HSW */ }; +struct drm_i915_insert_cmd { + struct list_head list; + void (*insert_cmd)(struct intel_ringbuffer *ringbuf, u32 ctx_id); +}; + +void i915_insert_profiling_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_insert_cmd *entry; + + list_for_each_entry(entry, &dev_priv->profile_cmd, list) + entry->insert_cmd(ringbuf, ctx_id); +} + +void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_oa_async_node_info *node_info = NULL; + struct drm_i915_oa_async_queue_header *queue_hdr = + (struct drm_i915_oa_async_queue_header *) + dev_priv->oa_pmu.oa_async_buffer.addr; + void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset; + int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset); + u32 data_offset, addr = 0; + int ret; + + struct drm_i915_oa_async_node *nodes = data_ptr; + int num_nodes = 0; + int index = 0; + + /* OA counters are only supported on the render ring */ + if (ring->id != RCS) + return; + + num_nodes = data_size / sizeof(*nodes); + index = queue_hdr->node_count % num_nodes; + + data_offset = offsetof(struct drm_i915_oa_async_node, report_perf); + + addr = i915_gem_obj_ggtt_offset(dev_priv->oa_pmu.oa_async_buffer.obj) + + queue_hdr->data_offset + + index * sizeof(struct drm_i915_oa_async_node) + + data_offset; + + /* addr should be 64 byte aligned */ + BUG_ON(addr & 0x3f); + + ret = intel_ring_begin(ring, 4); + if (ret) + return; + + intel_ring_emit(ring, MI_REPORT_PERF_COUNT | (1<<0)); + intel_ring_emit(ring, addr | MI_REPORT_PERF_COUNT_GGTT); + intel_ring_emit(ring, ring->outstanding_lazy_request->seqno); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + node_info = &nodes[index].node_info; + i915_gem_request_assign(&node_info->req, + ring->outstanding_lazy_request); + + node_info->pid = current->pid; + node_info->ctx_id = ctx_id; + queue_hdr->node_count++; + if (queue_hdr->node_count > num_nodes) + queue_hdr->wrap_count++; +} + static void init_oa_async_buf_queue(struct drm_i915_private *dev_priv) { struct drm_i915_oa_async_queue_header *hdr = @@ -865,6 +935,7 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work) container_of(__work, typeof(*dev_priv), oa_pmu.work_event_stop); struct perf_event *event = dev_priv->oa_pmu.exclusive_event; + struct drm_i915_insert_cmd *entry, *next; struct drm_i915_oa_async_queue_header *hdr = (struct drm_i915_oa_async_queue_header *) dev_priv->oa_pmu.oa_async_buffer.addr; @@ -882,6 +953,13 @@ void i915_oa_async_stop_work_fn(struct work_struct *__work) if (ret) return; + list_for_each_entry_safe(entry, next, &dev_priv->profile_cmd, list) { + if (entry->insert_cmd == i915_oa_insert_cmd) { + list_del(&entry->list); + kfree(entry); + } + } + dev_priv->oa_pmu.event_active = false; i915_oa_async_wait_gpu(dev_priv); @@ -920,8 +998,14 @@ static void i915_oa_event_start(struct perf_event *event, int flags) struct drm_i915_private *dev_priv = container_of(event->pmu, typeof(*dev_priv), oa_pmu.pmu); unsigned long lock_flags; + struct drm_i915_insert_cmd *entry; u32 oastatus1, tail; + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return; + entry->insert_cmd = i915_oa_insert_cmd; + if (dev_priv->oa_pmu.metrics_set == I915_OA_METRICS_SET_3D) { config_oa_regs(dev_priv, i915_oa_3d_mux_config_hsw, i915_oa_3d_mux_config_hsw_len); @@ -976,6 +1060,8 @@ static void i915_oa_event_start(struct perf_event *event, int flags) dev_priv->oa_pmu.event_active = true; update_oacontrol(dev_priv); + list_add_tail(&entry->list, &dev_priv->profile_cmd); + /* Reset the head ptr to ensure we don't forward reports relating * to a previous perf event */ oastatus1 = I915_READ(GEN7_OASTATUS1); -- 1.8.5.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx