From: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have some duplicated resources/paths. Although DSS behave similarly to 2 subslices, instead of splitting this and presenting userspace with bits not directly representative of hardware resources, present userspace with a subslice_mask made up of DSS bits instead. v2: GEM_BUG_ON on mask size (Lionel) Bspec: 29547 Bspec: 12247 Cc: Kelvin Gardiner <kelvin.gardiner@xxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> Cc: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> CC: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx> Cc: Michel Thierry <michel.thierry@xxxxxxxxx> #v1 Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Cc: José Roberto de Souza <jose.souza@xxxxxxxxx> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> Signed-off-by: James Ausmus <james.ausmus@xxxxxxxxx> Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> Signed-off-by: Sudeep Dutt <sudeep.dutt@xxxxxxxxx> Signed-off-by: Stuart Summers <stuart.summers@xxxxxxxxx> Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> Acked-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +-- drivers/gpu/drm/i915/i915_debugfs.c | 3 +- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------ include/uapi/drm/i915_drm.h | 6 +- 5 files changed, 72 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 4070f6ff1db6..d1d225204f09 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -18,12 +18,13 @@ struct drm_i915_private; #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE) #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) -#define GEN_MAX_EUS (10) /* HSW upper bound */ +#define GEN_MAX_EUS (16) /* TGL upper bound */ #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -40,12 +41,6 @@ struct sseu_dev_info { u8 ss_stride; u8 eu_stride; - - /* We don't have more than 8 eus per subslice at the moment and as we - * store eus enabled using bits, no need to multiply by eus per - * subslice. - */ - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; /* diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e5835337f022..f2b92be44adf 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3820,7 +3820,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, for (ss = 0; ss < info->sseu.max_subslices; ss++) { unsigned int eu_cnt; - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) + if (info->sseu.has_subslice_pg && + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss)))) /* skip disabled subslice */ continue; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bf37ecebc82f..47847135a11f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d9b5baaef5d0..8516731985eb 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, + u8 s_en, u32 ss_en, u16 eu_en) +{ + int s, ss; + + /* ss_en represents entire subslice mask across all slices */ + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > + sizeof(ss_en) * BITS_PER_BYTE); + + for (s = 0; s < sseu->max_slices; s++) { + if ((s_en & BIT(s)) == 0) + continue; + + sseu->slice_mask |= BIT(s); + + intel_sseu_set_subslices(sseu, s, ss_en); + + for (ss = 0; ss < sseu->max_subslices; ss++) + if (intel_sseu_has_subslice(sseu, s, ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + sseu->eu_per_subslice = hweight16(eu_en); + sseu->eu_total = compute_eu_total(sseu); +} + +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; + u8 s_en; + u32 dss_en; + u16 eu_en = 0; + u8 eu_en_fuse; + int eu; + + /* + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. + * Instead of splitting these, provide userspace with an array + * of DSS to more closely represent the hardware resource. + */ + intel_sseu_set_info(sseu, 1, 6, 16); + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); + + /* one bit per pair of EUs */ + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) + if (eu_en_fuse & BIT(eu)) + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); + + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + + /* TGL only supports slice-level power gating */ + sseu->has_slice_pg = 1; +} + static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu; u8 s_en; - u32 ss_en, ss_en_mask; + u32 ss_en; u8 eu_en; - int s; if (IS_ELKHARTLAKE(dev_priv)) intel_sseu_set_info(sseu, 1, 4, 8); @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); - ss_en_mask = BIT(sseu->max_subslices) - 1; eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - for (s = 0; s < sseu->max_slices; s++) { - if (s_en & BIT(s)) { - int ss_idx = sseu->max_subslices * s; - int ss; - - sseu->slice_mask |= BIT(s); - - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) & - ss_en_mask); - - for (ss = 0; ss < sseu->max_subslices; ss++) - if (intel_sseu_has_subslice(sseu, s, ss)) - sseu_set_eus(sseu, s, ss, eu_en); - } - } - sseu->eu_per_subslice = hweight8(eu_en); - sseu->eu_total = compute_eu_total(sseu); + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) gen9_sseu_info_init(dev_priv); else if (IS_GEN(dev_priv, 10)) gen10_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 11) + else if (IS_GEN(dev_priv, 11)) gen11_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 12) + gen12_sseu_info_init(dev_priv); if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { DRM_INFO("Disabling ppGTT for VT-d support\n"); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 469dc512cca3..30c542144016 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2033,8 +2033,10 @@ struct drm_i915_query { * (data[X / 8] >> (X % 8)) & 1 * * - the subslice mask for each slice with one bit per subslice telling - * whether a subslice is available. The availability of subslice Y in slice - * X can be queried with the following formula : + * whether a subslice is available. Gen12 has dual-subslices, which are + * similar to two gen11 subslices. For gen12, this array represents dual- + * subslices. The availability of subslice Y in slice X can be queried + * with the following formula : * * (data[subslice_offset + * X * subslice_stride + -- 2.17.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx