On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > > From: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > > Gen12 has dual-subslices (DSS), which compared to gen11 subslices have > some duplicated resources/paths. Although DSS behave similarly to 2 > subslices, instead of splitting this and presenting userspace with bits > not directly representative of hardware resources, present userspace > with a subslice_mask made up of DSS bits instead. > > v2: GEM_BUG_ON on mask size (Lionel) > > Bspec: 29547 > Bspec: 12247 > Cc: Kelvin Gardiner <kelvin.gardiner@xxxxxxxxx> > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > Cc: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > CC: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx> > Cc: Michel Thierry <michel.thierry@xxxxxxxxx> #v1 > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > Cc: José Roberto de Souza <jose.souza@xxxxxxxxx> > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > Signed-off-by: James Ausmus <james.ausmus@xxxxxxxxx> > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> > Signed-off-by: Sudeep Dutt <sudeep.dutt@xxxxxxxxx> > Signed-off-by: Stuart Summers <stuart.summers@xxxxxxxxx> > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > Acked-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > --- After this I get the correct values for TGL: - Available Subslice Total: 2 - Available Slice0 subslices: 2 - Available EU Total: 16 - Available EU Per Subslice: 8 + Available Subslice Total: 6 + Available Slice0 subslices: 6 + Available EU Total: 96 + Available EU Per Subslice: 16 Reviewed-by: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> Lucas De Marchi > drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +-- > drivers/gpu/drm/i915/i915_debugfs.c | 3 +- > drivers/gpu/drm/i915/i915_reg.h | 2 + > drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------ > include/uapi/drm/i915_drm.h | 6 +- > 5 files changed, 72 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h > index 4070f6ff1db6..d1d225204f09 100644 > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > @@ -18,12 +18,13 @@ struct drm_i915_private; > #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ > #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) > #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) > -#define GEN_MAX_EUS (10) /* HSW upper bound */ > +#define GEN_MAX_EUS (16) /* TGL upper bound */ > #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) > > struct sseu_dev_info { > u8 slice_mask; > u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; > + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; > u16 eu_total; > u8 eu_per_subslice; > u8 min_eu_in_pool; > @@ -40,12 +41,6 @@ struct sseu_dev_info { > > u8 ss_stride; > u8 eu_stride; > - > - /* We don't have more than 8 eus per subslice at the moment and as we > - * store eus enabled using bits, no need to multiply by eus per > - * subslice. > - */ > - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; > }; > > /* > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 43db50095257..b5b449a88cf1 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, > for (ss = 0; ss < info->sseu.max_subslices; ss++) { > unsigned int eu_cnt; > > - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) > + if (info->sseu.has_subslice_pg && > + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) > /* skip disabled subslice */ > continue; > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index bf37ecebc82f..47847135a11f 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) > > #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) > > +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) > + > #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) > #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) > #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) > diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c > index 50b05a5de53b..b91a960b037f 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.c > +++ b/drivers/gpu/drm/i915/intel_device_info.c > @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) > return total; > } > > +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, > + u8 s_en, u32 ss_en, u16 eu_en) > +{ > + int s, ss; > + > + /* ss_en represents entire subslice mask across all slices */ > + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > > + sizeof(ss_en) * BITS_PER_BYTE); > + > + for (s = 0; s < sseu->max_slices; s++) { > + if ((s_en & BIT(s)) == 0) > + continue; > + > + sseu->slice_mask |= BIT(s); > + > + intel_sseu_set_subslices(sseu, s, ss_en); > + > + for (ss = 0; ss < sseu->max_subslices; ss++) > + if (intel_sseu_has_subslice(sseu, s, ss)) > + sseu_set_eus(sseu, s, ss, eu_en); > + } > + sseu->eu_per_subslice = hweight16(eu_en); > + sseu->eu_total = compute_eu_total(sseu); > +} > + > +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) > +{ > + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; > + u8 s_en; > + u32 dss_en; > + u16 eu_en = 0; > + u8 eu_en_fuse; > + int eu; > + > + /* > + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. > + * Instead of splitting these, provide userspace with an array > + * of DSS to more closely represent the hardware resource. > + */ > + intel_sseu_set_info(sseu, 1, 6, 16); > + > + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; > + > + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); > + > + /* one bit per pair of EUs */ > + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); > + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) > + if (eu_en_fuse & BIT(eu)) > + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); > + > + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); > + > + /* TGL only supports slice-level power gating */ > + sseu->has_slice_pg = 1; > +} > + > static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) > { > struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; > u8 s_en; > - u32 ss_en, ss_en_mask; > + u32 ss_en; > u8 eu_en; > - int s; > > if (IS_ELKHARTLAKE(dev_priv)) > intel_sseu_set_info(sseu, 1, 4, 8); > @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) > > s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; > ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); > - ss_en_mask = BIT(sseu->max_subslices) - 1; > eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); > > - for (s = 0; s < sseu->max_slices; s++) { > - if (s_en & BIT(s)) { > - int ss_idx = sseu->max_subslices * s; > - int ss; > - > - sseu->slice_mask |= BIT(s); > - > - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & > - ss_en_mask); > - > - for (ss = 0; ss < sseu->max_subslices; ss++) > - if (intel_sseu_has_subslice(sseu, s, ss)) > - sseu_set_eus(sseu, s, ss, eu_en); > - } > - } > - sseu->eu_per_subslice = hweight8(eu_en); > - sseu->eu_total = compute_eu_total(sseu); > + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); > > /* ICL has no power gating restrictions. */ > sseu->has_slice_pg = 1; > @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) > gen9_sseu_info_init(dev_priv); > else if (IS_GEN(dev_priv, 10)) > gen10_sseu_info_init(dev_priv); > - else if (INTEL_GEN(dev_priv) >= 11) > + else if (IS_GEN(dev_priv, 11)) > gen11_sseu_info_init(dev_priv); > + else if (INTEL_GEN(dev_priv) >= 12) > + gen12_sseu_info_init(dev_priv); > > if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { > DRM_INFO("Disabling ppGTT for VT-d support\n"); > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 469dc512cca3..30c542144016 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -2033,8 +2033,10 @@ struct drm_i915_query { > * (data[X / 8] >> (X % 8)) & 1 > * > * - the subslice mask for each slice with one bit per subslice telling > - * whether a subslice is available. The availability of subslice Y in slice > - * X can be queried with the following formula : > + * whether a subslice is available. Gen12 has dual-subslices, which are > + * similar to two gen11 subslices. For gen12, this array represents dual- > + * subslices. The availability of subslice Y in slice X can be queried > + * with the following formula : > * > * (data[subslice_offset + > * X * subslice_stride + > -- > 2.23.0 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Lucas De Marchi _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx