On Thu, 16 Feb 2023 16:58:50 -0800, Umesh Nerlige Ramappa wrote: > Hi Umesh, > MTL introduces additional OA units dedicated to media use cases. Add > support for programming these OA units by passing the media engine class > and instance parameters. > > UMD specific changes for GPUvis support: > https://patchwork.freedesktop.org/patch/522827/?series=114023 > https://patchwork.freedesktop.org/patch/522822/?series=114023 > https://patchwork.freedesktop.org/patch/522826/?series=114023 > https://patchwork.freedesktop.org/patch/522828/?series=114023 > https://patchwork.freedesktop.org/patch/522816/?series=114023 > https://patchwork.freedesktop.org/patch/522825/?series=114023 General comment about the patch in case I miss something out, as I've mentioned previously in general let's try to replace INTEL_METEORLAKE and IS_METEORLAKE checks in the patch with: if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) So that we don't have to enumerate each platform individually later. > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_pci.c | 1 + > drivers/gpu/drm/i915/i915_perf.c | 247 ++++++++++++++++++++--- > drivers/gpu/drm/i915/i915_perf_oa_regs.h | 78 +++++++ > drivers/gpu/drm/i915/i915_perf_types.h | 40 ++++ > drivers/gpu/drm/i915/intel_device_info.h | 1 + > include/uapi/drm/i915_drm.h | 4 + > 7 files changed, 347 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 0393273faa09..f3cacbf41c86 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -856,6 +856,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > (INTEL_INFO(dev_priv)->has_oa_bpc_reporting) > #define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \ > (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits) > +#define HAS_OAM(dev_priv) \ > + (INTEL_INFO(dev_priv)->has_oam) > > /* > * Set this flag, when platform requires 64K GTT page sizes or larger for > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index a8d942b16223..621730b6551c 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -1028,6 +1028,7 @@ static const struct intel_device_info adl_p_info = { > .has_mslice_steering = 1, \ > .has_oa_bpc_reporting = 1, \ > .has_oa_slice_contrib_limits = 1, \ > + .has_oam = 1, \ > .has_rc6 = 1, \ > .has_reset_engine = 1, \ > .has_rps = 1, \ > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index f028df812067..a57690f4c531 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -192,6 +192,7 @@ > */ > > #include <linux/anon_inodes.h> > +#include <linux/nospec.h> > #include <linux/sizes.h> > #include <linux/uuid.h> > > @@ -326,6 +327,13 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = { > [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, > [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 }, > + [I915_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, TYPE_OAM, HDR_64_BIT }, > + [I915_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, TYPE_OAM, HDR_64_BIT }, > +}; > + > +/* PERF_GROUP_OAG is unused for oa_base, drop it for mtl */ What does this comment mean? > +static const u32 mtl_oa_base[] = { > + [PERF_GROUP_OAM_SAMEDIA_0] = 0x393000, > }; > > #define SAMPLE_OA_REPORT (1<<0) > @@ -418,11 +426,17 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) > kfree(oa_bo); > } > > +static inline const > +struct i915_perf_regs *__oa_regs(struct i915_perf_stream *stream) > +{ > + return &stream->oa_buffer.group->regs; Should just use stream->engine->oa_group->regs, see near the bottom. > +} > + > static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) > { > struct intel_uncore *uncore = stream->uncore; > > - return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & > + return intel_uncore_read(uncore, __oa_regs(stream)->oa_tail_ptr) & > GEN12_OAG_OATAILPTR_MASK; > } > > @@ -886,7 +900,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, > i915_reg_t oaheadptr; > > oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ? >= 12 ? > - GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; > + __oa_regs(stream)->oa_head_ptr : > + GEN8_OAHEADPTR; > > spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); > > @@ -939,7 +954,8 @@ static int gen8_oa_read(struct i915_perf_stream *stream, > return -EIO; > > oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ? >= 12 ? > - GEN12_OAG_OASTATUS : GEN8_OASTATUS; > + __oa_regs(stream)->oa_status : > + GEN8_OASTATUS; > > oastatus = intel_uncore_read(uncore, oastatus_reg); > > @@ -1643,16 +1659,46 @@ free_noa_wait(struct i915_perf_stream *stream) > i915_vma_unpin_and_release(&stream->noa_wait, 0); > } > > +/* > + * intel_engine_lookup_user ensures that most of engine specific checks are > + * taken care of, however, we can run into a case where the OA unit catering to > + * the engine passed by the user is disabled for some reason. In such cases, > + * ensure oa unit corresponding to an engine is functional. If there are no > + * engines in the group, the unit is disabled. > + */ > +static bool oa_unit_functional(const struct intel_engine_cs *engine) > +{ > + return engine->oa_group && engine->oa_group->num_engines; > +} > + > static bool engine_supports_oa(const struct intel_engine_cs *engine) > { > enum intel_platform platform = INTEL_INFO(engine->i915)->platform; > > switch (platform) { > + case INTEL_METEORLAKE: > + return engine->class == RENDER_CLASS || > + ((engine->class == VIDEO_DECODE_CLASS || > + engine->class == VIDEO_ENHANCEMENT_CLASS) && > + engine->gt->type == GT_MEDIA); > default: > return engine->class == RENDER_CLASS; > } As mentioned in a previous patch, this could just be: return engine->oa_group; Because all these checks have already been done when the perf groups were initialized so let's use that, as is done for oa_unit_functional. Though, caution, to return engine->oa_group we'd have to remove engine_supports_oa from __oa_engine_group, since engine->oa_group is not yet assigned there. But I think the engine_supports_oa check in __oa_engine_group is a duplication and should be removed. > } > > +static bool engine_class_supports_oa_format(struct intel_engine_cs *engine, int type) > +{ > + switch (engine->class) { > + case RENDER_CLASS: > + return type == TYPE_OAG; > + case VIDEO_DECODE_CLASS: > + case VIDEO_ENHANCEMENT_CLASS: > + return type == TYPE_OAM; > + default: > + return false; > + } > +} > + Again, how about: return engine->oa_group && engine->oa_group->type == type; Otherwise as mentioned below oa_group->type is unused and also incorrectly assigned at present. The format type and group types are the same (TYPE_OAG/TYPE_OAM). Can name the function engine_supports_oa_format. > static void i915_oa_stream_destroy(struct i915_perf_stream *stream) > { > struct i915_perf *perf = stream->perf; > @@ -1680,7 +1726,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) > drm_WARN_ON(>->i915->drm, > intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc)); > > - intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); > + intel_uncore_forcewake_put(stream->uncore, g->fw_domains); > intel_engine_pm_put(stream->engine); > > if (stream->ctx) > @@ -1804,8 +1850,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) > > spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); > > - intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); > - intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, > + intel_uncore_write(uncore, __oa_regs(stream)->oa_status, 0); > + intel_uncore_write(uncore, __oa_regs(stream)->oa_head_ptr, > gtt_offset & GEN12_OAG_OAHEADPTR_MASK); > stream->oa_buffer.head = gtt_offset; > > @@ -1817,9 +1863,9 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) > * to enable proper functionality of the overflow > * bit." > */ > - intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | > + intel_uncore_write(uncore, __oa_regs(stream)->oa_buffer, gtt_offset | > OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); > - intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, > + intel_uncore_write(uncore, __oa_regs(stream)->oa_tail_ptr, > gtt_offset & GEN12_OAG_OATAILPTR_MASK); > > /* Mark that we need updated tail pointers to read from... */ > @@ -2579,7 +2625,8 @@ gen8_modify_self(struct intel_context *ce, > return err; > } > > -static int gen8_configure_context(struct i915_gem_context *ctx, > +static int gen8_configure_context(struct i915_perf_stream *stream, > + struct i915_gem_context *ctx, > struct flex *flex, unsigned int count) > { > struct i915_gem_engines_iter it; > @@ -2589,7 +2636,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx, > for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { > GEM_BUG_ON(ce == ce->engine->kernel_context); > > - if (!engine_supports_oa(ce->engine)) > + if (!engine_supports_oa(ce->engine) || > + ce->engine->class != stream->engine->class) > continue; > > /* Otherwise OA settings will be set upon first use */ > @@ -2720,7 +2768,7 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, > > spin_unlock(&i915->gem.contexts.lock); > > - err = gen8_configure_context(ctx, regs, num_regs); > + err = gen8_configure_context(stream, ctx, regs, num_regs); > if (err) { > i915_gem_context_put(ctx); > return err; > @@ -2740,7 +2788,8 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, > for_each_uabi_engine(engine, i915) { > struct intel_context *ce = engine->kernel_context; > > - if (!engine_supports_oa(ce->engine)) > + if (!engine_supports_oa(ce->engine) || > + ce->engine->class != stream->engine->class) > continue; > > regs[0].value = intel_sseu_make_rpcs(engine->gt, &ce->sseu); > @@ -2765,6 +2814,9 @@ gen12_configure_all_contexts(struct i915_perf_stream *stream, > }, > }; > > + if (stream->engine->class != RENDER_CLASS) > + return 0; OK, this is for render, nothing equivalent needed for media? > + > return oa_configure_all_contexts(stream, > regs, ARRAY_SIZE(regs), > active); > @@ -2894,7 +2946,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, > _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING)); > } > > - intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, > + intel_uncore_write(uncore, __oa_regs(stream)->oa_debug, > /* Disable clk ratio reports, like previous Gens. */ > _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | > GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | > @@ -2904,7 +2956,7 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, > */ > oag_report_ctx_switches(stream)); > > - intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? > + intel_uncore_write(uncore, __oa_regs(stream)->oa_ctx_ctrl, periodic ? > (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | > GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | > (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) > @@ -3058,8 +3110,8 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) > > static void gen12_oa_enable(struct i915_perf_stream *stream) > { > - struct intel_uncore *uncore = stream->uncore; > - u32 report_format = stream->oa_buffer.format->format; > + const struct i915_perf_regs *regs; > + u32 val; > > /* > * If we don't want OA reports from the OA buffer, then we don't even > @@ -3070,9 +3122,11 @@ static void gen12_oa_enable(struct i915_perf_stream *stream) > > gen12_init_oa_buffer(stream); > > - intel_uncore_write(uncore, GEN12_OAG_OACONTROL, > - (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | > - GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); > + regs = __oa_regs(stream); > + val = (stream->oa_buffer.format->format << regs->oa_ctrl_counter_format_shift) | > + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE; > + > + intel_uncore_write(stream->uncore, regs->oa_ctrl, val); > } > > /** > @@ -3124,9 +3178,9 @@ static void gen12_oa_disable(struct i915_perf_stream *stream) > { > struct intel_uncore *uncore = stream->uncore; > > - intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); > + intel_uncore_write(uncore, __oa_regs(stream)->oa_ctrl, 0); > if (intel_wait_for_register(uncore, > - GEN12_OAG_OACONTROL, > + __oa_regs(stream)->oa_ctrl, > GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, > 50)) > drm_err(&stream->perf->i915->drm, > @@ -3329,6 +3383,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > > stream->sample_size = sizeof(struct drm_i915_perf_record_header); > > + stream->oa_buffer.group = g; Should just use stream->engine->oa_group, see near the bottom. > stream->oa_buffer.format = &perf->oa_formats[props->oa_format]; > if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0)) > return -EINVAL; > @@ -3379,7 +3434,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > * references will effectively disable RC6. > */ > intel_engine_pm_get(stream->engine); > - intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL); > + intel_uncore_forcewake_get(stream->uncore, g->fw_domains); > > /* > * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes > @@ -3440,7 +3495,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, > intel_guc_slpc_unset_gucrc_mode(>->uc.guc.slpc); > > err_gucrc: > - intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL); > + intel_uncore_forcewake_put(stream->uncore, g->fw_domains); > intel_engine_pm_put(stream->engine); > > free_oa_configs(stream); > @@ -4033,6 +4088,7 @@ static int read_properties_unlocked(struct i915_perf *perf, > struct perf_open_properties *props) > { > struct drm_i915_gem_context_param_sseu user_sseu; > + const struct i915_oa_format *f; > u64 __user *uprop = uprops; > bool config_sseu = false; > u8 class, instance; > @@ -4203,6 +4259,17 @@ static int read_properties_unlocked(struct i915_perf *perf, > if (!engine_supports_oa(props->engine)) > return -EINVAL; > > + if (!oa_unit_functional(props->engine)) > + return -ENODEV; > + > + i = array_index_nospec(props->oa_format, I915_OA_FORMAT_MAX); Why do we need this (something to do with speculation)? Can just do '&perf->oa_formats[props->oa_format]' below? The format passed in has already been checked in the switch statement above. > + f = &perf->oa_formats[i]; > + if (!engine_class_supports_oa_format(props->engine, f->type)) { > + DRM_DEBUG("Invalid OA format %d for class %d\n", > + f->type, props->engine->class); > + return -EINVAL; > + } > + > if (config_sseu) { > ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); > if (ret) { > @@ -4383,6 +4450,14 @@ static const struct i915_range gen12_oa_b_counters[] = { > {} > }; > > +static const struct i915_range mtl_oam_b_counters[] = { > + { .start = 0x393000, .end = 0x39301c }, /* GEN12_OAM_STARTTRIG1[1-8] */ > + { .start = 0x393020, .end = 0x39303c }, /* GEN12_OAM_REPORTTRIG1[1-8] */ > + { .start = 0x393040, .end = 0x39307c }, /* GEN12_OAM_CEC[0-7][0-1] */ > + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ > + {} > +}; > + > static const struct i915_range xehp_oa_b_counters[] = { > { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ > { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ > @@ -4429,13 +4504,16 @@ static const struct i915_range gen12_oa_mux_regs[] = { > > /* > * Ref: 14010536224: > - * 0x20cc is repurposed on MTL, so use a separate array for MTL. > + * 0x20cc is repurposed on MTL, so use a separate array for MTL. Also add the > + * MPES/MPEC registers. MPES/MPEC registers are added above now, not here so maybe get rid of the comment change above? > */ > static const struct i915_range mtl_oa_mux_regs[] = { > { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ > { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ > { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ > { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ > + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ > + {} > }; > > static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) > @@ -4473,10 +4551,26 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) > return reg_in_range_table(addr, gen12_oa_b_counters); > } > > +static bool xehp_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr) > +{ > + enum intel_platform platform = INTEL_INFO(perf->i915)->platform; > + > + if (!HAS_OAM(perf->i915)) > + return false; > + > + switch (platform) { > + case INTEL_METEORLAKE: > + return reg_in_range_table(addr, mtl_oam_b_counters); > + default: > + return false; > + } Replace with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))', registers are identical in later platforms too. Should the function prefix be xehp or mtl? Don't see xehp in bspec, probably xehp is discontinued. > +} > + > static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) > { > return reg_in_range_table(addr, xehp_oa_b_counters) || > - reg_in_range_table(addr, gen12_oa_b_counters); > + reg_in_range_table(addr, gen12_oa_b_counters) || > + xehp_is_valid_oam_b_counter_addr(perf, addr); > } > > static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) > @@ -4846,11 +4940,39 @@ static u32 __num_perf_groups_per_gt(struct intel_gt *gt) > enum intel_platform platform = INTEL_INFO(gt->i915)->platform; > > switch (platform) { > + case INTEL_METEORLAKE: > + return 1; I don't think we need this, as proposed previously maybe the function should just unconditionally return 1. > default: > return 1; > } > } > > +static u32 __oam_engine_group(struct intel_engine_cs *engine) > +{ > + enum intel_platform platform = INTEL_INFO(engine->i915)->platform; > + struct intel_gt *gt = engine->gt; > + u32 group = PERF_GROUP_INVALID; > + > + switch (platform) { > + case INTEL_METEORLAKE: Replace here with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))'. > + /* > + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices > + * within the gt use the same OAM. All MTL SKUs list 1 SA MEDIA. > + */ > + drm_WARN_ON(&engine->i915->drm, > + engine->gt->type != GT_MEDIA); > + > + group = PERF_GROUP_OAM_SAMEDIA_0; > + break; > + default: > + break; > + } > + > + drm_WARN_ON(>->i915->drm, group >= __num_perf_groups_per_gt(gt)); > + > + return group; > +} > + > static u32 __oa_engine_group(struct intel_engine_cs *engine) > { > if (!engine_supports_oa(engine)) As mentioned above for engine_supports_oa, this looks like a duplication of the checks below and should probably be removed. > @@ -4860,11 +4982,58 @@ static u32 __oa_engine_group(struct intel_engine_cs *engine) > case RENDER_CLASS: > return PERF_GROUP_OAG; > > + case VIDEO_DECODE_CLASS: > + case VIDEO_ENHANCEMENT_CLASS: > + return __oam_engine_group(engine); > + > default: > return PERF_GROUP_INVALID; > } > } > > +static struct i915_perf_regs __oam_regs(u32 base) > +{ > + return (struct i915_perf_regs) { > + base, > + GEN12_OAM_HEAD_POINTER(base), > + GEN12_OAM_TAIL_POINTER(base), > + GEN12_OAM_BUFFER(base), > + GEN12_OAM_CONTEXT_CONTROL(base), > + GEN12_OAM_CONTROL(base), > + GEN12_OAM_DEBUG(base), > + GEN12_OAM_STATUS(base), > + GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT, > + }; > +} > + > +static struct i915_perf_regs __oag_regs(void) > +{ > + return (struct i915_perf_regs) { > + 0, > + GEN12_OAG_OAHEADPTR, > + GEN12_OAG_OATAILPTR, > + GEN12_OAG_OABUFFER, > + GEN12_OAG_OAGLBCTXCTRL, > + GEN12_OAG_OACONTROL, > + GEN12_OAG_OA_DEBUG, > + GEN12_OAG_OASTATUS, > + GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT, > + }; > +} > + > +static void oa_init_regs(struct intel_gt *gt, u32 id) > +{ > + struct i915_perf_group *group = >->perf.group[id]; > + struct i915_perf_regs *regs = &group->regs; > + > + if (id == PERF_GROUP_OAG && gt->type != GT_MEDIA) > + *regs = __oag_regs(); > + else if (IS_METEORLAKE(gt->i915)) Replace with 'if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))', OAM registers are identical in later platforms too. Maybe get rid of drm_WARN below? > + *regs = __oam_regs(mtl_oa_base[id]); > + else > + drm_WARN(>->i915->drm, 1, "Unsupported platform for OA\n"); > +} > + > static void oa_init_groups(struct intel_gt *gt) > { > int i, num_groups = gt->perf.num_perf_groups; > @@ -4881,6 +5050,24 @@ static void oa_init_groups(struct intel_gt *gt) > g->oa_unit_id = perf->oa_unit_ids++; > > g->gt = gt; > + oa_init_regs(gt, i); > + g->fw_domains = FORCEWAKE_ALL; > + if (i == PERF_GROUP_OAG) { > + g->type = TYPE_OAG; > + > + /* > + * Enabling all fw domains for OAG caps the max GT > + * frequency to media FF max. This could be less than > + * what the user sets through the sysfs and perf > + * measurements could be skewed. Since some platforms > + * have separate OAM units to measure media perf, do not > + * enable media fw domains for OAG. > + */ > + if (HAS_OAM(gt->i915)) > + g->fw_domains = FORCEWAKE_GT | FORCEWAKE_RENDER; Is this needed even when media and render are separate tiles, which is the only case we have in this code right now? For separate tiles setting FORCEWAKE_ALL should not cap the freq, correct? If not needed we can get rid of g->fw_domains. > + } else { > + g->type = TYPE_OAM; This is wrong, because num_perf_groups is 1. So the type should be assigned not based on i (which is always 0) but maybe similar to what is done in oa_init_regs above. We are escaping because g->type is unused as mentioned below. > + } > } > } > > @@ -4970,9 +5157,15 @@ static void oa_init_supported_formats(struct i915_perf *perf) > break; > > case INTEL_DG2: > + oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); > + oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); > + break; > + > case INTEL_METEORLAKE: > oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8); > oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8); > + oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8); > + oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8); > break; > > default: > @@ -5217,8 +5410,10 @@ int i915_perf_ioctl_version(void) > * > * 6: Add DRM_I915_PERF_PROP_OA_ENGINE_CLASS and > * DRM_I915_PERF_PROP_OA_ENGINE_INSTANCE > + * > + * 7: Add support for video decode and enhancement classes. > */ > - return 6; > + return 7; Let's figure out if we want to club all this into 6. > } > > #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h > index 381d94101610..ba103875e19f 100644 > --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h > +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h > @@ -138,4 +138,82 @@ > #define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30) > #define GEN12_SQCNT1_OABPC REG_BIT(29) > > +/* Gen12 OAM unit */ > +#define GEN12_OAM_HEAD_POINTER_OFFSET (0x1a0) > +#define GEN12_OAM_HEAD_POINTER_MASK 0xffffffc0 > + > +#define GEN12_OAM_TAIL_POINTER_OFFSET (0x1a4) > +#define GEN12_OAM_TAIL_POINTER_MASK 0xffffffc0 > + > +#define GEN12_OAM_BUFFER_OFFSET (0x1a8) > +#define GEN12_OAM_BUFFER_SIZE_MASK (0x7) > +#define GEN12_OAM_BUFFER_SIZE_SHIFT (3) > +#define GEN12_OAM_BUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ > + > +#define GEN12_OAM_CONTEXT_CONTROL_OFFSET (0x1bc) > +#define GEN12_OAM_CONTEXT_CONTROL_TIMER_PERIOD_SHIFT 2 > +#define GEN12_OAM_CONTEXT_CONTROL_TIMER_ENABLE REG_BIT(1) > +#define GEN12_OAM_CONTEXT_CONTROL_COUNTER_RESUME REG_BIT(0) > + > +#define GEN12_OAM_CONTROL_OFFSET (0x194) > +#define GEN12_OAM_CONTROL_COUNTER_FORMAT_SHIFT 1 > +#define GEN12_OAM_CONTROL_COUNTER_ENABLE REG_BIT(0) > + > +#define GEN12_OAM_DEBUG_OFFSET (0x198) > +#define GEN12_OAM_DEBUG_BUFFER_SIZE_SELECT REG_BIT(12) > +#define GEN12_OAM_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) > +#define GEN12_OAM_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) > +#define GEN12_OAM_DEBUG_DISABLE_GO_1_0_REPORTS REG_BIT(2) > +#define GEN12_OAM_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) > + > +#define GEN12_OAM_STATUS_OFFSET (0x19c) > +#define GEN12_OAM_STATUS_COUNTER_OVERFLOW REG_BIT(2) > +#define GEN12_OAM_STATUS_BUFFER_OVERFLOW REG_BIT(1) > +#define GEN12_OAM_STATUS_REPORT_LOST REG_BIT(0) > + > +#define GEN12_OAM_MMIO_TRG_OFFSET (0x1d0) > + > +#define GEN12_OAM_MMIO_TRG(base) \ > + _MMIO((base) + GEN12_OAM_MMIO_TRG_OFFSET) > + > +#define GEN12_OAM_HEAD_POINTER(base) \ > + _MMIO((base) + GEN12_OAM_HEAD_POINTER_OFFSET) > +#define GEN12_OAM_TAIL_POINTER(base) \ > + _MMIO((base) + GEN12_OAM_TAIL_POINTER_OFFSET) > +#define GEN12_OAM_BUFFER(base) \ > + _MMIO((base) + GEN12_OAM_BUFFER_OFFSET) > +#define GEN12_OAM_CONTEXT_CONTROL(base) \ > + _MMIO((base) + GEN12_OAM_CONTEXT_CONTROL_OFFSET) > +#define GEN12_OAM_CONTROL(base) \ > + _MMIO((base) + GEN12_OAM_CONTROL_OFFSET) > +#define GEN12_OAM_DEBUG(base) \ > + _MMIO((base) + GEN12_OAM_DEBUG_OFFSET) > +#define GEN12_OAM_STATUS(base) \ > + _MMIO((base) + GEN12_OAM_STATUS_OFFSET) > + > +#define GEN12_OAM_CEC0_0_OFFSET (0x40) > +#define GEN12_OAM_CEC7_1_OFFSET (0x7c) > +#define GEN12_OAM_CEC0_0(base) \ > + _MMIO((base) + GEN12_OAM_CEC0_0_OFFSET) > +#define GEN12_OAM_CEC7_1(base) \ > + _MMIO((base) + GEN12_OAM_CEC7_1_OFFSET) > + > +#define GEN12_OAM_STARTTRIG1_OFFSET (0x00) > +#define GEN12_OAM_STARTTRIG8_OFFSET (0x1c) > +#define GEN12_OAM_STARTTRIG1(base) \ > + _MMIO((base) + GEN12_OAM_STARTTRIG1_OFFSET) > +#define GEN12_OAM_STARTTRIG8(base) \ > + _MMIO((base) + GEN12_OAM_STARTTRIG8_OFFSET) > + > +#define GEN12_OAM_REPORTTRIG1_OFFSET (0x20) > +#define GEN12_OAM_REPORTTRIG8_OFFSET (0x3c) > +#define GEN12_OAM_REPORTTRIG1(base) \ > + _MMIO((base) + GEN12_OAM_REPORTTRIG1_OFFSET) > +#define GEN12_OAM_REPORTTRIG8(base) \ > + _MMIO((base) + GEN12_OAM_REPORTTRIG8_OFFSET) > + > +#define GEN12_OAM_PERF_COUNTER_B0_OFFSET (0x84) > +#define GEN12_OAM_PERF_COUNTER_B(base, idx) \ > + _MMIO((base) + GEN12_OAM_PERF_COUNTER_B0_OFFSET + 4 * (idx)) > + > #endif /* __INTEL_PERF_OA_REGS__ */ > diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h > index 8ccb0b89d019..5b2c3bab60f8 100644 > --- a/drivers/gpu/drm/i915/i915_perf_types.h > +++ b/drivers/gpu/drm/i915/i915_perf_types.h > @@ -20,6 +20,7 @@ > #include "gt/intel_engine_types.h" > #include "gt/intel_sseu.h" > #include "i915_reg_defs.h" > +#include "intel_uncore.h" > #include "intel_wakeref.h" > > struct drm_i915_private; > @@ -33,6 +34,7 @@ struct intel_engine_cs; > > enum { > PERF_GROUP_OAG = 0, > + PERF_GROUP_OAM_SAMEDIA_0 = 0, > > PERF_GROUP_MAX, > PERF_GROUP_INVALID = U32_MAX, > @@ -43,9 +45,27 @@ enum report_header { > HDR_64_BIT, > }; > > +struct i915_perf_regs { > + u32 base; > + i915_reg_t oa_head_ptr; > + i915_reg_t oa_tail_ptr; > + i915_reg_t oa_buffer; > + i915_reg_t oa_ctx_ctrl; > + i915_reg_t oa_ctrl; > + i915_reg_t oa_debug; > + i915_reg_t oa_status; > + u32 oa_ctrl_counter_format_shift; > +}; > + > +enum { enum oa_type? > + TYPE_OAG, > + TYPE_OAM, > +}; > + > struct i915_oa_format { > u32 format; > int size; > + int type; > enum report_header header; > }; > > @@ -317,6 +337,11 @@ struct i915_perf_stream { > * @tail: The last verified tail that can be read by userspace. > */ > u32 tail; > + > + /** > + * @group: The group object for this OA buffer. > + */ > + struct i915_perf_group *group; Isn't this just stream->engine->oa_group, so let's use that instead rather than duplicating? > } oa_buffer; > > /** > @@ -431,6 +456,21 @@ struct i915_perf_group { > * @engine_mask: A mask of engines using a single OA buffer. > */ > intel_engine_mask_t engine_mask; > + > + /* > + * @regs: OA buffer register group for programming the OA unit. > + */ > + struct i915_perf_regs regs; > + > + /* > + * @type: Type of OA buffer, OAM, OAG etc. s/OA buffer/OA unit/ > + */ > + int type; Also (incorrectly) assigned but currently unused unless we make the change to engine_class_supports_oa_format mentioned above. But I think we should retain and use this. > + > + /* > + * @fw_domains: forcewake domains required for this group. > + */ > + enum forcewake_domains fw_domains; Let's see if we need this. > }; > > struct i915_perf_gt { > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h > index 80bda653d61b..45e218327f44 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -166,6 +166,7 @@ enum intel_ppgtt_type { > func(has_mslice_steering); \ > func(has_oa_bpc_reporting); \ > func(has_oa_slice_contrib_limits); \ > + func(has_oam); \ > func(has_one_eu_per_fuse_bit); \ > func(has_pxp); \ > func(has_rc6); \ > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index b6922b52d85c..70bfa6530dbc 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -2676,6 +2676,10 @@ enum drm_i915_oa_format { > I915_OAR_FORMAT_A32u40_A4u32_B8_C8, > I915_OA_FORMAT_A24u40_A14u32_B8_C8, > > + /* MTL OAM */ > + I915_OAM_FORMAT_MPEC8u64_B8_C8, > + I915_OAM_FORMAT_MPEC8u32_B8_C8, > + > I915_OA_FORMAT_MAX /* non-ABI */ > }; Thanks. -- Ashutosh