This patch enables to sample CTX ID corresponding to requests being submitted on the engines through perf stream. OA report already embed this information from Gen8+. For previous Gens we can leverage value sampled from request->ctx->hw_id to associate with OA reports. v2: Updated stream->last_ctx_id to INVALID_CTX_ID during stream_init. Testcase: igt/intel_perf_dapc/perf-ctxid Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 5 ++++ drivers/gpu/drm/i915/i915_perf.c | 56 ++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/i915_drm.h | 7 +++++ 3 files changed, 68 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3c1bc01..4b425f2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2245,6 +2245,11 @@ struct i915_perf_cs_sample { /* Is this sample prior to request start or post request end */ enum request_sample_id id; + + /** + * @ctx_id: Context ID associated with this perf sample + */ + u32 ctx_id; }; struct intel_cdclk_state { diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index bce7388..8c6913a 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -293,6 +293,7 @@ /* Data common to periodic and RCS based OA samples */ struct i915_perf_sample_data { u64 source; + u64 ctx_id; const u8 *report; }; @@ -347,6 +348,7 @@ struct i915_perf_sample_data { #define SAMPLE_OA_REPORT (1<<0) #define SAMPLE_OA_SOURCE (1<<1) +#define SAMPLE_CTX_ID (1<<2) /** * struct perf_open_properties - for validated properties given to open a stream @@ -620,6 +622,7 @@ static void i915_perf_stream_patch_request(struct i915_perf_stream *stream, list_move_tail(&sample->link, &stream->cs_samples); sample->request = i915_gem_request_get(request); sample->id = sample_id; + sample->ctx_id = request->ctx->hw_id; if (stream->sample_flags & (SAMPLE_OA_REPORT | SAMPLE_OA_SOURCE)) i915_perf_stream_patch_sample_oa(stream, request, @@ -877,6 +880,12 @@ static int append_perf_sample(struct i915_perf_stream *stream, buf += 8; } + if (sample_flags & SAMPLE_CTX_ID) { + if (copy_to_user(buf, &data->ctx_id, 8)) + return -EFAULT; + buf += 8; + } + if (sample_flags & SAMPLE_OA_REPORT) { if (copy_to_user(buf, data->report, report_size)) return -EFAULT; @@ -903,12 +912,27 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream, char __user *buf, size_t count, size_t *offset, const u8 *report) { + struct drm_i915_private *dev_priv = stream->dev_priv; u32 sample_flags = stream->sample_flags; struct i915_perf_sample_data data = { 0 }; + u32 *report32 = (u32 *)report; if (sample_flags & SAMPLE_OA_SOURCE) data.source = I915_PERF_SAMPLE_OA_SOURCE_OABUFFER; + if (sample_flags & SAMPLE_CTX_ID) { + if (INTEL_INFO(dev_priv)->gen < 8) + data.ctx_id = 0; + else { + /* + * XXX: Just keep the lower 21 bits for now since I'm + * not entirely sure if the HW touches any of the higher + * bits in this field + */ + data.ctx_id = report32[2] & 0x1fffff; + } + } + if (sample_flags & SAMPLE_OA_REPORT) data.report = report; @@ -1487,6 +1511,9 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream, if (sample_flags & SAMPLE_OA_SOURCE) data.source = I915_PERF_SAMPLE_OA_SOURCE_CS; + if (sample_flags & SAMPLE_CTX_ID) + data.ctx_id = node->ctx_id; + return append_perf_sample(stream, buf, count, offset, &data); } @@ -2665,6 +2692,19 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, struct intel_engine_cs *engine = NULL; int ret; + if ((props->sample_flags & SAMPLE_CTX_ID) && !props->cs_mode) { + if (IS_HASWELL(dev_priv)) { + DRM_ERROR("On HSW, context ID sampling only supported " + "via command stream\n"); + return -EINVAL; + } else if (!i915.enable_execlists) { + DRM_ERROR("On Gen8+ without execlists, context ID " + "sampling only supported via " + "command stream\n"); + return -EINVAL; + } + } + /* We set up some ratelimit state to potentially throttle any _NOTES * about spurious, invalid OA reports which we don't forward to * userspace. @@ -2794,6 +2834,12 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, } + if (props->sample_flags & SAMPLE_CTX_ID) { + stream->sample_flags |= SAMPLE_CTX_ID; + stream->sample_size += 8; + stream->last_ctx_id = INVALID_CTX_ID; + } + if (props->cs_mode) { if (!cs_sample_data) { DRM_DEBUG_DRIVER( @@ -2803,6 +2849,13 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream, goto err_enable; } + if (!(props->sample_flags & SAMPLE_CTX_ID)) { + DRM_ERROR("Stream engine given without requesting any " + "CS specific property\n"); + ret = -EINVAL; + goto err_enable; + } + idx = srcu_read_lock(&dev_priv->perf.oa.srcu); curr_stream = srcu_dereference( dev_priv->perf.oa.exclusive_stream, @@ -3520,6 +3573,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->engine = engine; } break; + case DRM_I915_PERF_PROP_SAMPLE_CTX_ID: + props->sample_flags |= SAMPLE_CTX_ID; + break; case DRM_I915_PERF_PROP_MAX: MISSING_CASE(id); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2e0b239..68baaf9 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1433,6 +1433,12 @@ enum drm_i915_perf_property_id { */ DRM_I915_PERF_PROP_ENGINE, + /** + * The value of this property set to 1 requests inclusion of context ID + * in the perf sample data. + */ + DRM_I915_PERF_PROP_SAMPLE_CTX_ID, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; @@ -1499,6 +1505,7 @@ enum drm_i915_perf_record_type { * struct drm_i915_perf_record_header header; * * { u64 source; } && DRM_I915_PERF_PROP_SAMPLE_OA_SOURCE + * { u64 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID * { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA * }; */ -- 1.9.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx