From: Sourab Gupta <sourab.gupta@xxxxxxxxx> This patch adds the routines through which one can insert commands in the ringbuf for capturing timestamps, which are used to insert these commands around the batchbuffer. While inserting the commands, we keep a reference of associated request. This will be released when we are forwarding the samples to userspace (or when the event is being destroyed). Also, an active reference of the destination buffer is taken here, so that we can be assured that the buffer is freed up only after GPU is done with it, even if the local reference of the buffer is released. Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_oa_perf.c | 73 +++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 2 + 3 files changed, 76 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 41a01bd..59d23d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1662,6 +1662,7 @@ enum i915_oa_event_state { enum i915_profile_mode { I915_PROFILE_OA = 0, + I915_PROFILE_TS, I915_PROFILE_MAX, }; diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index 107570e..1780de42 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -102,6 +102,77 @@ void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, u32 ctx_id, int tag) i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring); } +void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, + int tag) +{ + struct intel_engine_cs *ring = ringbuf->ring; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj; + struct i915_gen_pmu_node *entry; + unsigned long lock_flags; + u32 addr = 0; + int ret; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (entry == NULL) { + DRM_ERROR("alloc failed\n"); + return; + } + entry->ctx_id = ctx_id; + i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request); + + spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); + if (list_empty(&dev_priv->gen_pmu.node_list)) + entry->offset = 0; + else { + struct i915_gen_pmu_node *last_entry; + int max_offset = dev_priv->gen_pmu.buffer.node_count * + dev_priv->gen_pmu.buffer.node_size; + + last_entry = list_last_entry(&dev_priv->gen_pmu.node_list, + struct i915_gen_pmu_node, head); + entry->offset = last_entry->offset + + dev_priv->gen_pmu.buffer.node_size; + + if (entry->offset > max_offset) + entry->offset = 0; + } + list_add_tail(&entry->head, &dev_priv->gen_pmu.node_list); + spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); + + addr = i915_gem_obj_ggtt_offset(obj) + entry->offset; + + if (ring->id == RCS) { + ret = intel_ring_begin(ring, 6); + if (ret) + return; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); + intel_ring_emit(ring, + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_TIMESTAMP_WRITE); + intel_ring_emit(ring, addr | PIPE_CONTROL_GLOBAL_GTT); + intel_ring_emit(ring, 0); /* imm low, must be zero */ + intel_ring_emit(ring, 0); /* imm high, must be zero */ + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } else { + ret = intel_ring_begin(ring, 4); + if (ret) + return; + + intel_ring_emit(ring, + MI_FLUSH_DW | MI_FLUSH_DW_OP_STAMP); + intel_ring_emit(ring, addr | MI_FLUSH_DW_USE_GTT); + intel_ring_emit(ring, 0); /* imm low, must be zero */ + intel_ring_emit(ring, 0); /* imm high, must be zero */ + intel_ring_advance(ring); + } + + obj->base.write_domain = I915_GEM_DOMAIN_RENDER; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring); +} + static void forward_one_oa_snapshot_to_event(struct drm_i915_private *dev_priv, u8 *snapshot, struct perf_event *event) @@ -1521,6 +1592,7 @@ static void i915_gen_event_start(struct perf_event *event, int flags) spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); dev_priv->gen_pmu.event_active = true; + dev_priv->insert_profile_cmd[I915_PROFILE_TS] = i915_gen_insert_cmd_ts; spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); @@ -1542,6 +1614,7 @@ static void i915_gen_event_stop(struct perf_event *event, int flags) spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags); dev_priv->gen_pmu.event_active = false; + dev_priv->insert_profile_cmd[I915_PROFILE_TS] = NULL; list_for_each_entry(entry, &dev_priv->gen_pmu.node_list, head) entry->discard = true; spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c9955968..f816b08 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -347,6 +347,7 @@ #define MI_FLUSH_DW_STORE_INDEX (1<<21) #define MI_INVALIDATE_TLB (1<<18) #define MI_FLUSH_DW_OP_STOREDW (1<<14) +#define MI_FLUSH_DW_OP_STAMP (3<<14) #define MI_FLUSH_DW_OP_MASK (3<<14) #define MI_FLUSH_DW_NOTIFY (1<<8) #define MI_INVALIDATE_BSD (1<<7) @@ -423,6 +424,7 @@ #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) #define PIPE_CONTROL_QW_WRITE (1<<14) #define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) +#define PIPE_CONTROL_TIMESTAMP_WRITE (3<<14) #define PIPE_CONTROL_DEPTH_STALL (1<<13) #define PIPE_CONTROL_WRITE_FLUSH (1<<12) #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ -- 1.8.5.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx