From: Sourab Gupta <sourab.gupta@xxxxxxxxx> The periodic reports and the RCS based reports are collected in two separate buffers. While forwarding to userspace, these have to be sent to single perf event ringbuffer. From a userspace perspective, it is good to have the reports in the single buffer in order to their timestamps. This patch addresses this problem by forwarding the periodic OA reports with a lower timestamp, whenever we are forwarding the Command streamer based report. Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_oa_perf.c | 38 ++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index a4fdca3..491496b 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -48,8 +48,7 @@ static void forward_one_oa_snapshot_to_event(struct drm_i915_private *dev_priv, } static u32 forward_oa_snapshots(struct drm_i915_private *dev_priv, - u32 head, - u32 tail) + u32 head, u32 tail, u64 gpu_ts) { struct perf_event *exclusive_event = dev_priv->oa_pmu.exclusive_event; int snapshot_size = dev_priv->oa_pmu.oa_buffer.format_size; @@ -58,14 +57,6 @@ static u32 forward_oa_snapshots(struct drm_i915_private *dev_priv, u8 *snapshot; u32 taken; - /* - * Schedule a worker to forward the RCS based OA reports collected. - * A worker is needed since it requires device mutex to be taken - * which can't be done here because of atomic context - */ - if (dev_priv->oa_pmu.multiple_ctx_mode) - schedule_work(&dev_priv->oa_pmu.work_timer); - head -= dev_priv->oa_pmu.oa_buffer.gtt_offset; tail -= dev_priv->oa_pmu.oa_buffer.gtt_offset; @@ -75,12 +66,19 @@ static u32 forward_oa_snapshots(struct drm_i915_private *dev_priv, */ while ((taken = OA_TAKEN(tail, head))) { + u64 snapshot_ts; + /* The tail increases in 64 byte increments, not in * format_size steps. */ if (taken < snapshot_size) break; snapshot = oa_buf_base + (head & mask); + + snapshot_ts = *(u64 *)(snapshot + 4); + if (snapshot_ts > gpu_ts) + break; + head += snapshot_size; /* We currently only allow exclusive access to the counters @@ -122,7 +120,7 @@ static void log_oa_status(struct drm_i915_private *dev_priv, } static void flush_oa_snapshots(struct drm_i915_private *dev_priv, - bool skip_if_flushing) + bool skip_if_flushing, u64 gpu_ts) { unsigned long flags; u32 oastatus2; @@ -165,7 +163,7 @@ static void flush_oa_snapshots(struct drm_i915_private *dev_priv, GEN7_OASTATUS1_REPORT_LOST)); } - head = forward_oa_snapshots(dev_priv, head, tail); + head = forward_oa_snapshots(dev_priv, head, tail, gpu_ts); I915_WRITE(GEN7_OASTATUS2, (head & GEN7_OASTATUS2_HEAD_MASK) | GEN7_OASTATUS2_GGTT); @@ -215,6 +213,7 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv, u8 *snapshot; struct drm_i915_oa_node_ctx_id *ctx_info; struct perf_raw_record raw; + u64 snapshot_ts; format_size = dev_priv->oa_pmu.oa_rcs_buffer.format_size; snapshot_size = format_size + sizeof(*ctx_info); @@ -223,6 +222,10 @@ static void forward_one_oa_rcs_sample(struct drm_i915_private *dev_priv, ctx_info = (struct drm_i915_oa_node_ctx_id *)(snapshot + format_size); ctx_info->ctx_id = node->ctx_id; + /* Flush the periodic snapshots till the ts of this OA report */ + snapshot_ts = *(u64 *)(snapshot + 4); + flush_oa_snapshots(dev_priv, true, snapshot_ts); + perf_sample_data_init(&data, 0, event->hw.last_period); /* Note: the combined u32 raw->size member + raw data itself must be 8 @@ -502,7 +505,10 @@ static enum hrtimer_restart hrtimer_sample(struct hrtimer *hrtimer) struct drm_i915_private *i915 = container_of(hrtimer, typeof(*i915), oa_pmu.timer); - flush_oa_snapshots(i915, true); + if (i915->oa_pmu.multiple_ctx_mode) + schedule_work(&i915->oa_pmu.work_timer); + else + flush_oa_snapshots(i915, true, U64_MAX); hrtimer_forward_now(hrtimer, ns_to_ktime(PERIOD)); return HRTIMER_RESTART; @@ -931,7 +937,9 @@ static void i915_oa_event_stop(struct perf_event *event, int flags) if (event->attr.sample_period) { hrtimer_cancel(&dev_priv->oa_pmu.timer); - flush_oa_snapshots(dev_priv, false); + if (dev_priv->oa_pmu.multiple_ctx_mode) + schedule_work(&dev_priv->oa_pmu.work_timer); + flush_oa_snapshots(dev_priv, false, U64_MAX); } event->hw.state = PERF_HES_STOPPED; @@ -971,7 +979,7 @@ static int i915_oa_event_flush(struct perf_event *event) if (ret) return ret; } - flush_oa_snapshots(i915, true); + flush_oa_snapshots(i915, true, U64_MAX); } return 0; -- 1.8.5.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx