On Sun, 2019-09-22 at 19:48 +0300, Lionel Landwerlin wrote: > On 21/09/2019 03:39, Lucas De Marchi wrote: > > On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson < > > chris@xxxxxxxxxxxxxxxxxx> wrote: > > > From: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > > > > > > Gen12 has dual-subslices (DSS), which compared to gen11 subslices > > > have > > > some duplicated resources/paths. Although DSS behave similarly to > > > 2 > > > subslices, instead of splitting this and presenting userspace > > > with bits > > > not directly representative of hardware resources, present > > > userspace > > > with a subslice_mask made up of DSS bits instead. > > > > > > v2: GEM_BUG_ON on mask size (Lionel) > > > > > > Bspec: 29547 > > > Bspec: 12247 > > > Cc: Kelvin Gardiner <kelvin.gardiner@xxxxxxxxx> > > > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > > Cc: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > > > CC: Radhakrishna Sripada <radhakrishna.sripada@xxxxxxxxx> > > > Cc: Michel Thierry <michel.thierry@xxxxxxxxx> #v1 > > > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@xxxxxxxxx> > > > Cc: José Roberto de Souza <jose.souza@xxxxxxxxx> > > > Signed-off-by: Daniele Ceraolo Spurio < > > > daniele.ceraolospurio@xxxxxxxxx> > > > Signed-off-by: James Ausmus <james.ausmus@xxxxxxxxx> > > > Signed-off-by: Oscar Mateo <oscar.mateo@xxxxxxxxx> > > > Signed-off-by: Sudeep Dutt <sudeep.dutt@xxxxxxxxx> > > > Signed-off-by: Stuart Summers <stuart.summers@xxxxxxxxx> > > > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > > > Acked-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> > > > --- > > > > After this I get the correct values for TGL: > > - Available Subslice Total: 2 > > - Available Slice0 subslices: 2 > > - Available EU Total: 16 > > - Available EU Per Subslice: 8 > > + Available Subslice Total: 6 > > + Available Slice0 subslices: 6 > > + Available EU Total: 96 > > + Available EU Per Subslice: 16 > > > > Reviewed-by: Lucas De Marchi <lucas.demarchi@xxxxxxxxx> > > > > Lucas De Marchi > > > Btw, shouldn't we print "Dualsubslice" rather than "Subslice" for > TGL? The idea here is that from the userspace perspective, there is still only one unit which can be utilized in hardware. Thanks, Stuart > > > -Lionel > > > > > drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +-- > > > drivers/gpu/drm/i915/i915_debugfs.c | 3 +- > > > drivers/gpu/drm/i915/i915_reg.h | 2 + > > > drivers/gpu/drm/i915/intel_device_info.c | 83 > > > ++++++++++++++++++------ > > > include/uapi/drm/i915_drm.h | 6 +- > > > 5 files changed, 72 insertions(+), 31 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h > > > b/drivers/gpu/drm/i915/gt/intel_sseu.h > > > index 4070f6ff1db6..d1d225204f09 100644 > > > --- a/drivers/gpu/drm/i915/gt/intel_sseu.h > > > +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h > > > @@ -18,12 +18,13 @@ struct drm_i915_private; > > > #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ > > > #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, > > > BITS_PER_BYTE) > > > #define GEN_MAX_SUBSLICE_STRIDE > > > GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES) > > > -#define GEN_MAX_EUS (10) /* HSW upper bound */ > > > +#define GEN_MAX_EUS (16) /* TGL upper bound */ > > > #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS) > > > > > > struct sseu_dev_info { > > > u8 slice_mask; > > > u8 subslice_mask[GEN_MAX_SLICES * > > > GEN_MAX_SUBSLICE_STRIDE]; > > > + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * > > > GEN_MAX_EU_STRIDE]; > > > u16 eu_total; > > > u8 eu_per_subslice; > > > u8 min_eu_in_pool; > > > @@ -40,12 +41,6 @@ struct sseu_dev_info { > > > > > > u8 ss_stride; > > > u8 eu_stride; > > > - > > > - /* We don't have more than 8 eus per subslice at the > > > moment and as we > > > - * store eus enabled using bits, no need to multiply by > > > eus per > > > - * subslice. > > > - */ > > > - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; > > > }; > > > > > > /* > > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c > > > b/drivers/gpu/drm/i915/i915_debugfs.c > > > index 43db50095257..b5b449a88cf1 100644 > > > --- a/drivers/gpu/drm/i915/i915_debugfs.c > > > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > > > @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct > > > drm_i915_private *dev_priv, > > > for (ss = 0; ss < info->sseu.max_subslices; > > > ss++) { > > > unsigned int eu_cnt; > > > > > > - if (!(s_reg[s] & > > > (GEN9_PGCTL_SS_ACK(ss)))) > > > + if (info->sseu.has_subslice_pg && > > > + !(s_reg[s] & > > > (GEN9_PGCTL_SS_ACK(ss)))) > > > /* skip disabled subslice */ > > > continue; > > > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h > > > b/drivers/gpu/drm/i915/i915_reg.h > > > index bf37ecebc82f..47847135a11f 100644 > > > --- a/drivers/gpu/drm/i915/i915_reg.h > > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > > @@ -2956,6 +2956,8 @@ static inline bool > > > i915_mmio_reg_valid(i915_reg_t reg) > > > > > > #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) > > > > > > +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) > > > + > > > #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) > > > #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) > > > #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) > > > diff --git a/drivers/gpu/drm/i915/intel_device_info.c > > > b/drivers/gpu/drm/i915/intel_device_info.c > > > index 50b05a5de53b..b91a960b037f 100644 > > > --- a/drivers/gpu/drm/i915/intel_device_info.c > > > +++ b/drivers/gpu/drm/i915/intel_device_info.c > > > @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct > > > sseu_dev_info *sseu) > > > return total; > > > } > > > > > > +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, > > > + u8 s_en, u32 ss_en, u16 > > > eu_en) > > > +{ > > > + int s, ss; > > > + > > > + /* ss_en represents entire subslice mask across all > > > slices */ > > > + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > > > > + sizeof(ss_en) * BITS_PER_BYTE); > > > + > > > + for (s = 0; s < sseu->max_slices; s++) { > > > + if ((s_en & BIT(s)) == 0) > > > + continue; > > > + > > > + sseu->slice_mask |= BIT(s); > > > + > > > + intel_sseu_set_subslices(sseu, s, ss_en); > > > + > > > + for (ss = 0; ss < sseu->max_subslices; ss++) > > > + if (intel_sseu_has_subslice(sseu, s, ss)) > > > + sseu_set_eus(sseu, s, ss, eu_en); > > > + } > > > + sseu->eu_per_subslice = hweight16(eu_en); > > > + sseu->eu_total = compute_eu_total(sseu); > > > +} > > > + > > > +static void gen12_sseu_info_init(struct drm_i915_private > > > *dev_priv) > > > +{ > > > + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)- > > > >sseu; > > > + u8 s_en; > > > + u32 dss_en; > > > + u16 eu_en = 0; > > > + u8 eu_en_fuse; > > > + int eu; > > > + > > > + /* > > > + * Gen12 has Dual-Subslices, which behave similarly to 2 > > > gen11 SS. > > > + * Instead of splitting these, provide userspace with an > > > array > > > + * of DSS to more closely represent the hardware > > > resource. > > > + */ > > > + intel_sseu_set_info(sseu, 1, 6, 16); > > > + > > > + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & > > > GEN11_GT_S_ENA_MASK; > > > + > > > + dss_en = I915_READ(GEN12_GT_DSS_ENABLE); > > > + > > > + /* one bit per pair of EUs */ > > > + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & > > > GEN11_EU_DIS_MASK); > > > + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) > > > + if (eu_en_fuse & BIT(eu)) > > > + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); > > > + > > > + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); > > > + > > > + /* TGL only supports slice-level power gating */ > > > + sseu->has_slice_pg = 1; > > > +} > > > + > > > static void gen11_sseu_info_init(struct drm_i915_private > > > *dev_priv) > > > { > > > struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)- > > > >sseu; > > > u8 s_en; > > > - u32 ss_en, ss_en_mask; > > > + u32 ss_en; > > > u8 eu_en; > > > - int s; > > > > > > if (IS_ELKHARTLAKE(dev_priv)) > > > intel_sseu_set_info(sseu, 1, 4, 8); > > > @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct > > > drm_i915_private *dev_priv) > > > > > > s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & > > > GEN11_GT_S_ENA_MASK; > > > ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); > > > - ss_en_mask = BIT(sseu->max_subslices) - 1; > > > eu_en = ~(I915_READ(GEN11_EU_DISABLE) & > > > GEN11_EU_DIS_MASK); > > > > > > - for (s = 0; s < sseu->max_slices; s++) { > > > - if (s_en & BIT(s)) { > > > - int ss_idx = sseu->max_subslices * s; > > > - int ss; > > > - > > > - sseu->slice_mask |= BIT(s); > > > - > > > - intel_sseu_set_subslices(sseu, s, (ss_en > > > >> ss_idx) & > > > - ss_en_m > > > ask); > > > - > > > - for (ss = 0; ss < sseu->max_subslices; > > > ss++) > > > - if (intel_sseu_has_subslice(sseu, > > > s, ss)) > > > - sseu_set_eus(sseu, s, ss, > > > eu_en); > > > - } > > > - } > > > - sseu->eu_per_subslice = hweight8(eu_en); > > > - sseu->eu_total = compute_eu_total(sseu); > > > + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); > > > > > > /* ICL has no power gating restrictions. */ > > > sseu->has_slice_pg = 1; > > > @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct > > > drm_i915_private *dev_priv) > > > gen9_sseu_info_init(dev_priv); > > > else if (IS_GEN(dev_priv, 10)) > > > gen10_sseu_info_init(dev_priv); > > > - else if (INTEL_GEN(dev_priv) >= 11) > > > + else if (IS_GEN(dev_priv, 11)) > > > gen11_sseu_info_init(dev_priv); > > > + else if (INTEL_GEN(dev_priv) >= 12) > > > + gen12_sseu_info_init(dev_priv); > > > > > > if (IS_GEN(dev_priv, 6) && intel_vtd_active()) { > > > DRM_INFO("Disabling ppGTT for VT-d support\n"); > > > diff --git a/include/uapi/drm/i915_drm.h > > > b/include/uapi/drm/i915_drm.h > > > index 469dc512cca3..30c542144016 100644 > > > --- a/include/uapi/drm/i915_drm.h > > > +++ b/include/uapi/drm/i915_drm.h > > > @@ -2033,8 +2033,10 @@ struct drm_i915_query { > > > * (data[X / 8] >> (X % 8)) & 1 > > > * > > > * - the subslice mask for each slice with one bit per subslice > > > telling > > > - * whether a subslice is available. The availability of > > > subslice Y in slice > > > - * X can be queried with the following formula : > > > + * whether a subslice is available. Gen12 has dual-subslices, > > > which are > > > + * similar to two gen11 subslices. For gen12, this array > > > represents dual- > > > + * subslices. The availability of subslice Y in slice X can be > > > queried > > > + * with the following formula : > > > * > > > * (data[subslice_offset + > > > * X * subslice_stride + > > > -- > > > 2.23.0 > > > > > > _______________________________________________ > > > Intel-gfx mailing list > > > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx > > > > > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Attachment:
smime.p7s
Description: S/MIME cryptographic signature
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx