On Tue, 11 Oct 2022, Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx> wrote: > On Tue, Oct 11, 2022 at 08:47:00PM +0300, Jani Nikula wrote: >>On Mon, 10 Oct 2022, Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx> wrote: >>> Some SKUs of same gen12 platform may have different oactxctrl >>> offsets. For gen12, determine oactxctrl offsets at runtime. >>> >>> v2: (Lionel) >>> - Move MI definitions to intel_gpu_commands.h >>> - Ensure __find_reg_in_lri does read past context image size >>> >>> v3: (Ashutosh) >>> - Drop unnecessary use of double underscores >>> - fix find_reg_in_lri >>> - Return error if oa context offset is U32_MAX >>> - Error out if oa_ctx_ctrl_offset does not find offset >>> >>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx> >>> --- >>> drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 4 + >>> drivers/gpu/drm/i915/i915_perf.c | 154 +++++++++++++++---- >>> drivers/gpu/drm/i915/i915_perf_oa_regs.h | 2 +- >>> 3 files changed, 129 insertions(+), 31 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >>> index d4e9702d3c8e..f50ea92910d9 100644 >>> --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >>> +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h >>> @@ -187,6 +187,10 @@ >>> #define MI_BATCH_RESOURCE_STREAMER REG_BIT(10) >>> #define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/ >>> >>> +#define MI_OPCODE(x) (((x) >> 23) & 0x3f) >>> +#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0))) >>> +#define MI_LRI_LEN(x) (((x) & 0xff) + 1) >>> + >>> /* >>> * 3D instructions used by the kernel >>> */ >>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >>> index cd57b5836386..b292aa39633e 100644 >>> --- a/drivers/gpu/drm/i915/i915_perf.c >>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>> @@ -1358,6 +1358,68 @@ static int gen12_get_render_context_id(struct i915_perf_stream *stream) >>> return 0; >>> } >>> >>> +#define valid_oactxctrl_offset(x) ((x) && (x) != U32_MAX) >>> +static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) >>> +{ >>> + u32 idx = *offset; >>> + u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); >> >>I don't understand any of this stuff, but why are the offset, index and >>length u32 instead of just, say, int? > > I can use int, but the ce->engine->context_size is u32 and we are > parsing the context image here, so I have just used the same type for > offset, index, length. > > Any guideline/recommendation to choose int over u32? If it's an index in general, looping, regular computation, just plain old int will often do. Use u32 and friends when you actually need a specific size to map to hardware or registers etc. I guess it's not so clear cut here, and mixing types not necessarily a bright idea here either. BR, Jani. > >> >>> + bool found = false; >>> + >>> + idx++; >>> + for (; idx < len; idx += 2) { >> >>I find the initialization of idx and len confusing, and thereby the >>entire loop too. > > Considering that the context image is a collection of MI_LRI commands > with each command having this format: > > dword 0: MI_LRI > dword 1: reg offset > dword 2: reg value > dword 3: reg offset > dword 4: reg value > ... > > oa_context_image_offset() and oa_find_reg_in_lri() are parsing this > context image to look for a specific reg_offset. Once the offset is > known, the OA code programs the reg_value for the context. > > Thanks, > Umesh > >> >>BR, >>Jani. >> >> >>> + if (state[idx] == reg) { >>> + found = true; >>> + break; >>> + } >>> + } >>> + >>> + *offset = idx; >>> + return found; >>> +} >>> + >>> +static u32 oa_context_image_offset(struct intel_context *ce, u32 reg) >>> +{ >>> + u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4; >>> + u32 *state = ce->lrc_reg_state; >>> + >>> + for (offset = 0; offset < len; ) { >>> + if (IS_MI_LRI_CMD(state[offset])) { >>> + if (oa_find_reg_in_lri(state, reg, &offset, len)) >>> + break; >>> + } else { >>> + offset++; >>> + } >>> + } >>> + >>> + return offset < len ? offset : U32_MAX; >>> +} >>> + >>> +static int set_oa_ctx_ctrl_offset(struct intel_context *ce) >>> +{ >>> + i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base); >>> + struct i915_perf *perf = &ce->engine->i915->perf; >>> + u32 offset = perf->ctx_oactxctrl_offset; >>> + >>> + /* Do this only once. Failure is stored as offset of U32_MAX */ >>> + if (offset) >>> + goto exit; >>> + >>> + offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg)); >>> + perf->ctx_oactxctrl_offset = offset; >>> + >>> + drm_dbg(&ce->engine->i915->drm, >>> + "%s oa ctx control at 0x%08x dword offset\n", >>> + ce->engine->name, offset); >>> + >>> +exit: >>> + return valid_oactxctrl_offset(offset) ? 0 : -ENODEV; >>> +} >>> + >>> +static bool engine_supports_mi_query(struct intel_engine_cs *engine) >>> +{ >>> + return engine->class == RENDER_CLASS; >>> +} >>> + >>> /** >>> * oa_get_render_ctx_id - determine and hold ctx hw id >>> * @stream: An i915-perf stream opened for OA metrics >>> @@ -1377,6 +1439,21 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) >>> if (IS_ERR(ce)) >>> return PTR_ERR(ce); >>> >>> + if (engine_supports_mi_query(stream->engine)) { >>> + /* >>> + * We are enabling perf query here. If we don't find the context >>> + * offset here, just return an error. >>> + */ >>> + ret = set_oa_ctx_ctrl_offset(ce); >>> + if (ret) { >>> + intel_context_unpin(ce); >>> + drm_err(&stream->perf->i915->drm, >>> + "Enabling perf query failed for %s\n", >>> + stream->engine->name); >>> + return ret; >>> + } >>> + } >>> + >>> switch (GRAPHICS_VER(ce->engine->i915)) { >>> case 7: { >>> /* >>> @@ -2408,10 +2485,11 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, >>> int err; >>> struct intel_context *ce = stream->pinned_ctx; >>> u32 format = stream->oa_buffer.format; >>> + u32 offset = stream->perf->ctx_oactxctrl_offset; >>> struct flex regs_context[] = { >>> { >>> GEN8_OACTXCONTROL, >>> - stream->perf->ctx_oactxctrl_offset + 1, >>> + offset + 1, >>> active ? GEN8_OA_COUNTER_RESUME : 0, >>> }, >>> }; >>> @@ -2436,15 +2514,18 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream, >>> }, >>> }; >>> >>> - /* Modify the context image of pinned context with regs_context*/ >>> - err = intel_context_lock_pinned(ce); >>> - if (err) >>> - return err; >>> + /* Modify the context image of pinned context with regs_context */ >>> + if (valid_oactxctrl_offset(offset)) { >>> + err = intel_context_lock_pinned(ce); >>> + if (err) >>> + return err; >>> >>> - err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context)); >>> - intel_context_unlock_pinned(ce); >>> - if (err) >>> - return err; >>> + err = gen8_modify_context(ce, regs_context, >>> + ARRAY_SIZE(regs_context)); >>> + intel_context_unlock_pinned(ce); >>> + if (err) >>> + return err; >>> + } >>> >>> /* Apply regs_lri using LRI with pinned context */ >>> return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri), active); >>> @@ -2566,6 +2647,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream, >>> const struct i915_oa_config *oa_config, >>> struct i915_active *active) >>> { >>> + u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset; >>> /* The MMIO offsets for Flex EU registers aren't contiguous */ >>> const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset; >>> #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) >>> @@ -2576,7 +2658,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream, >>> }, >>> { >>> GEN8_OACTXCONTROL, >>> - stream->perf->ctx_oactxctrl_offset + 1, >>> + ctx_oactxctrl + 1, >>> }, >>> { EU_PERF_CNTL0, ctx_flexeuN(0) }, >>> { EU_PERF_CNTL1, ctx_flexeuN(1) }, >>> @@ -4545,6 +4627,37 @@ static void oa_init_supported_formats(struct i915_perf *perf) >>> } >>> } >>> >>> +static void i915_perf_init_info(struct drm_i915_private *i915) >>> +{ >>> + struct i915_perf *perf = &i915->perf; >>> + >>> + switch (GRAPHICS_VER(i915)) { >>> + case 8: >>> + perf->ctx_oactxctrl_offset = 0x120; >>> + perf->ctx_flexeu0_offset = 0x2ce; >>> + perf->gen8_valid_ctx_bit = BIT(25); >>> + break; >>> + case 9: >>> + perf->ctx_oactxctrl_offset = 0x128; >>> + perf->ctx_flexeu0_offset = 0x3de; >>> + perf->gen8_valid_ctx_bit = BIT(16); >>> + break; >>> + case 11: >>> + perf->ctx_oactxctrl_offset = 0x124; >>> + perf->ctx_flexeu0_offset = 0x78e; >>> + perf->gen8_valid_ctx_bit = BIT(16); >>> + break; >>> + case 12: >>> + /* >>> + * Calculate offset at runtime in oa_pin_context for gen12 and >>> + * cache the value in perf->ctx_oactxctrl_offset. >>> + */ >>> + break; >>> + default: >>> + MISSING_CASE(GRAPHICS_VER(i915)); >>> + } >>> +} >>> + >>> /** >>> * i915_perf_init - initialize i915-perf state on module bind >>> * @i915: i915 device instance >>> @@ -4583,6 +4696,7 @@ void i915_perf_init(struct drm_i915_private *i915) >>> * execlist mode by default. >>> */ >>> perf->ops.read = gen8_oa_read; >>> + i915_perf_init_info(i915); >>> >>> if (IS_GRAPHICS_VER(i915, 8, 9)) { >>> perf->ops.is_valid_b_counter_reg = >>> @@ -4602,18 +4716,6 @@ void i915_perf_init(struct drm_i915_private *i915) >>> perf->ops.enable_metric_set = gen8_enable_metric_set; >>> perf->ops.disable_metric_set = gen8_disable_metric_set; >>> perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; >>> - >>> - if (GRAPHICS_VER(i915) == 8) { >>> - perf->ctx_oactxctrl_offset = 0x120; >>> - perf->ctx_flexeu0_offset = 0x2ce; >>> - >>> - perf->gen8_valid_ctx_bit = BIT(25); >>> - } else { >>> - perf->ctx_oactxctrl_offset = 0x128; >>> - perf->ctx_flexeu0_offset = 0x3de; >>> - >>> - perf->gen8_valid_ctx_bit = BIT(16); >>> - } >>> } else if (GRAPHICS_VER(i915) == 11) { >>> perf->ops.is_valid_b_counter_reg = >>> gen7_is_valid_b_counter_addr; >>> @@ -4627,11 +4729,6 @@ void i915_perf_init(struct drm_i915_private *i915) >>> perf->ops.enable_metric_set = gen8_enable_metric_set; >>> perf->ops.disable_metric_set = gen11_disable_metric_set; >>> perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; >>> - >>> - perf->ctx_oactxctrl_offset = 0x124; >>> - perf->ctx_flexeu0_offset = 0x78e; >>> - >>> - perf->gen8_valid_ctx_bit = BIT(16); >>> } else if (GRAPHICS_VER(i915) == 12) { >>> perf->ops.is_valid_b_counter_reg = >>> gen12_is_valid_b_counter_addr; >>> @@ -4645,9 +4742,6 @@ void i915_perf_init(struct drm_i915_private *i915) >>> perf->ops.enable_metric_set = gen12_enable_metric_set; >>> perf->ops.disable_metric_set = gen12_disable_metric_set; >>> perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; >>> - >>> - perf->ctx_flexeu0_offset = 0; >>> - perf->ctx_oactxctrl_offset = 0x144; >>> } >>> } >>> >>> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h >>> index f31c9f13a9fc..0ef3562ff4aa 100644 >>> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h >>> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h >>> @@ -97,7 +97,7 @@ >>> #define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 >>> #define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0) >>> >>> -#define GEN12_OACTXCONTROL _MMIO(0x2360) >>> +#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360) >>> #define GEN12_OAR_OASTATUS _MMIO(0x2968) >>> >>> /* Gen12 OAG unit */ >> >>-- >>Jani Nikula, Intel Open Source Graphics Center -- Jani Nikula, Intel Open Source Graphics Center