We want to enable performance monitoring on multiple contexts to cover the Iris use case of using 2 GEM contexts (3D & compute). So start by breaking the OA configuration BO which contains global & per context register writes. NOA muxes & OA configurations are global, while FLEXEU register configurations are per context. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_perf.c | 194 ++++++++++++++++++++++--------- 1 file changed, 137 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 3222f6cd8255..f524f50abdef 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -376,7 +376,8 @@ struct i915_oa_config_bo { struct llist_node node; struct i915_oa_config *oa_config; - struct i915_vma *vma; + struct i915_vma *ctx_vma; + struct i915_vma *global_vma; }; static struct ctl_table_header *sysctl_header; @@ -412,7 +413,8 @@ i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) { i915_oa_config_put(oa_bo->oa_config); - i915_vma_put(oa_bo->vma); + i915_vma_put(oa_bo->ctx_vma); + i915_vma_put(oa_bo->global_vma); kfree(oa_bo); } @@ -1868,9 +1870,9 @@ static struct i915_oa_config_bo * alloc_oa_config_buffer(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) { - struct drm_i915_gem_object *obj; struct i915_oa_config_bo *oa_bo; - size_t config_length = 0; + struct drm_i915_gem_object *global_obj, *ctx_obj; + size_t global_config_length = 0, ctx_config_length; u32 *cs; int err; @@ -1878,27 +1880,26 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, if (!oa_bo) return ERR_PTR(-ENOMEM); - config_length += num_lri_dwords(oa_config->mux_regs_len); - config_length += num_lri_dwords(oa_config->b_counter_regs_len); - config_length += num_lri_dwords(oa_config->flex_regs_len); - config_length += 3; /* MI_BATCH_BUFFER_START */ - config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE); - - obj = i915_gem_object_create_shmem(stream->perf->i915, config_length); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); + /* Global configuration requires a wait for it to apply. */ + global_config_length += num_lri_dwords(oa_config->mux_regs_len); + global_config_length += num_lri_dwords(oa_config->b_counter_regs_len); + global_config_length += 3; /* MI_BATCH_BUFFER_START */ + global_config_length = ALIGN(sizeof(u32) * global_config_length, + I915_GTT_PAGE_SIZE); + + global_obj = i915_gem_object_create_shmem(stream->perf->i915, + global_config_length); + if (IS_ERR(global_obj)) { + err = PTR_ERR(global_obj); goto err_free; } - cs = i915_gem_object_pin_map(obj, I915_MAP_WB); + cs = i915_gem_object_pin_map(global_obj, I915_MAP_WB); if (IS_ERR(cs)) { err = PTR_ERR(cs); - goto err_oa_bo; + goto err_global_bo; } - cs = write_cs_mi_lri(cs, - oa_config->mux_regs, - oa_config->mux_regs_len); cs = write_cs_mi_lri(cs, oa_config->b_counter_regs, oa_config->b_counter_regs_len); @@ -1913,15 +1914,51 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, *cs++ = i915_ggtt_offset(stream->noa_wait); *cs++ = 0; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); + i915_gem_object_flush_map(global_obj); + i915_gem_object_unpin_map(global_obj); + + oa_bo->global_vma = i915_vma_instance(global_obj, + &stream->engine->gt->ggtt->vm, + NULL); + if (IS_ERR(oa_bo->global_vma)) { + err = PTR_ERR(oa_bo->global_vma); + goto err_global_bo; + } + + /* There is no known delay needed for the per context registers. */ + ctx_config_length = 1 /* MI_BATCH_BUFFER_END */ + + num_lri_dwords(oa_config->flex_regs_len); + ctx_config_length = ALIGN(sizeof(u32) * ctx_config_length, + I915_GTT_PAGE_SIZE); + + ctx_obj = i915_gem_object_create_shmem(stream->perf->i915, + ctx_config_length); + if (IS_ERR(ctx_obj)) { + err = PTR_ERR(ctx_obj); + goto err_global_vma; + } + + cs = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_global_vma; + } + + cs = write_cs_mi_lri(cs, + oa_config->mux_regs, + oa_config->mux_regs_len); + + *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(ctx_obj); + i915_gem_object_unpin_map(ctx_obj); - oa_bo->vma = i915_vma_instance(obj, - &stream->engine->gt->ggtt->vm, - NULL); - if (IS_ERR(oa_bo->vma)) { - err = PTR_ERR(oa_bo->vma); - goto err_oa_bo; + oa_bo->ctx_vma = i915_vma_instance(ctx_obj, + &stream->engine->gt->ggtt->vm, + NULL); + if (IS_ERR(oa_bo->ctx_vma)) { + err = PTR_ERR(oa_bo->ctx_vma); + goto err_ctx_bo; } oa_bo->oa_config = i915_oa_config_get(oa_config); @@ -1929,15 +1966,19 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream, return oa_bo; -err_oa_bo: - i915_gem_object_put(obj); +err_ctx_bo: + i915_gem_object_put(ctx_obj); +err_global_vma: + i915_vma_put(oa_bo->global_vma); +err_global_bo: + i915_gem_object_put(global_obj); err_free: kfree(oa_bo); return ERR_PTR(err); } -static struct i915_vma * -get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) +static struct i915_oa_config_bo * +get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) { struct i915_oa_config_bo *oa_bo; @@ -1950,29 +1991,29 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config) memcmp(oa_bo->oa_config->uuid, oa_config->uuid, sizeof(oa_config->uuid)) == 0) - goto out; + return oa_bo; } - oa_bo = alloc_oa_config_buffer(stream, oa_config); - if (IS_ERR(oa_bo)) - return ERR_CAST(oa_bo); - -out: - return i915_vma_get(oa_bo->vma); + return alloc_oa_config_buffer(stream, oa_config); } static struct i915_request * emit_oa_config(struct i915_perf_stream *stream, struct i915_oa_config *oa_config, - struct intel_context *ce) + struct intel_context *ce, + bool global) { + struct i915_oa_config_bo *oa_bo; struct i915_request *rq; struct i915_vma *vma; int err; - vma = get_oa_vma(stream, oa_config); - if (IS_ERR(vma)) - return ERR_CAST(vma); + oa_bo = get_oa_bo(stream, oa_config); + if (IS_ERR(oa_bo)) + return ERR_CAST(oa_bo); + + vma = global ? i915_vma_get(oa_bo->global_vma) : + i915_vma_get(oa_bo->ctx_vma); err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (err) @@ -2019,6 +2060,7 @@ static struct i915_request * hsw_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; + struct i915_request *rq; /* * PRM: @@ -2035,7 +2077,15 @@ hsw_enable_metric_set(struct i915_perf_stream *stream) intel_uncore_rmw(uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE); - return emit_oa_config(stream, stream->oa_config, oa_context(stream)); + rq = emit_oa_config(stream, stream->oa_config, + stream->engine->kernel_context, + false /* global */); + if (IS_ERR(rq)) + return rq; + + return emit_oa_config(stream, stream->oa_config, + stream->engine->kernel_context, + true /* global */); } static void hsw_disable_metric_set(struct i915_perf_stream *stream) @@ -2423,6 +2473,7 @@ gen8_enable_metric_set(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; struct i915_oa_config *oa_config = stream->oa_config; + struct i915_request *rq; int ret; /* @@ -2463,7 +2514,15 @@ gen8_enable_metric_set(struct i915_perf_stream *stream) if (ret) return ERR_PTR(ret); - return emit_oa_config(stream, oa_config, oa_context(stream)); + rq = emit_oa_config(stream, oa_config, + stream->engine->kernel_context, + false /* global */); + if (IS_ERR(rq)) + return rq; + + return emit_oa_config(stream, stream->oa_config, + stream->engine->kernel_context, + true /* global */); } static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream) @@ -2480,6 +2539,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; + struct i915_request *rq; int ret; intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, @@ -2508,17 +2568,23 @@ gen12_enable_metric_set(struct i915_perf_stream *stream) return ERR_PTR(ret); /* - * For Gen12, performance counters are context - * saved/restored. Only enable it for the context that - * requested this. + * For Gen12, performance counters are also context saved/restored on + * another set of performance registers. Configure the unit dealing + * with those. */ - if (stream->ctx) { - ret = gen12_configure_oar_context(stream, true); - if (ret) - return ERR_PTR(ret); - } + ret = gen12_configure_oar_context(stream, true); + if (ret) + return ERR_PTR(ret); + + rq = emit_oa_config(stream, oa_config, + stream->engine->kernel_context, + false /* global */); + if (IS_ERR(rq)) + return rq; - return emit_oa_config(stream, oa_config, oa_context(stream)); + return emit_oa_config(stream, stream->oa_config, + stream->engine->kernel_context, + true /* global */); } static void gen8_disable_metric_set(struct i915_perf_stream *stream) @@ -3228,15 +3294,29 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream, * When set globally, we use a low priority kernel context, * so it will effectively take effect when idle. */ - rq = emit_oa_config(stream, config, oa_context(stream)); - if (!IS_ERR(rq)) { - config = xchg(&stream->oa_config, config); - i915_request_put(rq); - } else { + rq = emit_oa_config(stream, config, + oa_context(stream), + false /* global */); + if (IS_ERR(rq)) { ret = PTR_ERR(rq); + goto err; } + + i915_request_put(rq); + + rq = emit_oa_config(stream, config, + oa_context(stream), + true /* global */); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto err; + } + + config = xchg(&stream->oa_config, config); + i915_request_put(rq); } +err: i915_oa_config_put(config); return ret; -- 2.26.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx