Up to now, subslice mask was assumed to be uniform across slices. But starting with Cannonlake, slices can be asymetric (for example slice0 has different number of subslices as slice1+). This change stores all subslices masks for all slices rather than having a single mask that applies to all slices. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 24 +++-- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 23 ++++- drivers/gpu/drm/i915/intel_device_info.c | 169 ++++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 6 files changed, 161 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 28294470ae31..ef091a2a6b12 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4340,7 +4340,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask = BIT(0); - sseu->subslice_mask |= BIT(ss); + sseu->subslices_mask[0] |= BIT(ss); eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) + ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) + @@ -4387,7 +4387,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, continue; sseu->slice_mask |= BIT(s); - sseu->subslice_mask = info->sseu.subslice_mask; + sseu->subslices_mask[s] = info->sseu.subslices_mask[s]; for (ss = 0; ss < ss_max; ss++) { unsigned int eu_cnt; @@ -4442,8 +4442,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); if (IS_GEN9_BC(dev_priv)) - sseu->subslice_mask = - INTEL_INFO(dev_priv)->sseu.subslice_mask; + sseu->subslices_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslices_mask[s]; for (ss = 0; ss < ss_max; ss++) { unsigned int eu_cnt; @@ -4453,7 +4453,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, /* skip disabled subslice */ continue; - sseu->subslice_mask |= BIT(ss); + sseu->subslices_mask[s] |= BIT(ss); } eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] & @@ -4475,9 +4475,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK; if (sseu->slice_mask) { - sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; sseu->eu_per_subslice = INTEL_INFO(dev_priv)->sseu.eu_per_subslice; + for (s = 0; s < fls(sseu->slice_mask); s++) { + sseu->subslices_mask[s] = + INTEL_INFO(dev_priv)->sseu.subslices_mask[s]; + } sseu->eu_total = sseu->eu_per_subslice * sseu_subslice_total(sseu); @@ -4496,6 +4499,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, { struct drm_i915_private *dev_priv = node_to_i915(m->private); const char *type = is_available_info ? "Available" : "Enabled"; + int s; seq_printf(m, " %s Slice Mask: %04x\n", type, sseu->slice_mask); @@ -4503,10 +4507,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info, hweight8(sseu->slice_mask)); seq_printf(m, " %s Subslice Total: %u\n", type, sseu_subslice_total(sseu)); - seq_printf(m, " %s Subslice Mask: %04x\n", type, - sseu->subslice_mask); - seq_printf(m, " %s Subslice Per Slice: %u\n", type, - hweight8(sseu->subslice_mask)); + for (s = 0; s < fls(sseu->slice_mask); s++) { + seq_printf(m, " %s Slice%i Subslice Mask: %04x\n", type, + s, sseu->subslices_mask[s]); + } seq_printf(m, " %s EU Total: %u\n", type, sseu->eu_total); seq_printf(m, " %s EU Per Subslice: %u\n", type, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7faf20aff25a..b9bfc38e6188 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_SUBSLICE_MASK: - value = INTEL_INFO(dev_priv)->sseu.subslice_mask; + value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0]; if (!value) return -ENODEV; break; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 594fd14e66c5..0a8e8a3772e5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -801,9 +801,12 @@ struct intel_csr { func(supports_tv); \ func(has_ipc); +#define GEN_MAX_SLICES (6) /* CNL upper bound */ +#define GEN_MAX_SUBSLICES (7) + struct sseu_dev_info { u8 slice_mask; - u8 subslice_mask; + u8 subslices_mask[GEN_MAX_SLICES]; u8 eu_total; u8 eu_per_subslice; u8 min_eu_in_pool; @@ -812,11 +815,27 @@ struct sseu_dev_info { u8 has_slice_pg:1; u8 has_subslice_pg:1; u8 has_eu_pg:1; + + /* Topology fields */ + u8 max_slices; + u8 max_subslices; + u8 max_eus_per_subslice; + + /* We don't have more than 8 eus per subslice at the moment and as we + * store eus enabled using bits, no need to multiply by eus per + * subslice. + */ + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES]; }; static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu) { - return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask); + unsigned s, total = 0; + + for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++) + total += hweight8(sseu->subslices_mask[s]); + + return total; } /* Keep in gen based order, and chronological order within a gen */ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 405d70124a46..11ceb43ddcee 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -82,22 +82,74 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv) #undef PRINT_FLAG } +static u8 compute_eu_total(const struct sseu_dev_info *sseu) +{ + u8 i, total = 0; + + for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) + total += hweight8(sseu->eu_mask[i]); + + return total; +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; const u32 fuse2 = I915_READ(GEN8_FUSE2); + int s, ss, eu_mask = 0xff; + u32 subslice_mask, eu_en; sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> GEN10_F2_S_ENA_SHIFT; - sseu->subslice_mask = (1 << 4) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> - GEN10_F2_SS_DIS_SHIFT); + sseu->max_slices = 6; + sseu->max_subslices = 4; + sseu->max_eus_per_subslice = 8; + + subslice_mask = (1 << 4) - 1; + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> + GEN10_F2_SS_DIS_SHIFT); + + /* Slice0 can have up to 3 subslices, but there are only 2 in + * slice1/2. + */ + sseu->subslices_mask[0] = subslice_mask; + for (s = 1; s < sseu->max_slices; s++) + sseu->subslices_mask[s] = subslice_mask & 0x3; + + /* Slice0 */ + eu_en = ~I915_READ(GEN8_EU_DISABLE0); + for (ss = 0; ss < sseu->max_subslices; ss++) + sseu->eu_mask[ss] = (eu_en >> (8 * ss)) & eu_mask; + /* Slice1 */ + sseu->eu_mask[sseu->max_subslices] = (eu_en >> 24) & eu_mask; + eu_en = ~I915_READ(GEN8_EU_DISABLE1); + sseu->eu_mask[sseu->max_subslices + 1] = eu_en & eu_mask; + /* Slice2 */ + sseu->eu_mask[2 * sseu->max_subslices] = (eu_en >> 8) & eu_mask; + sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask; + /* Slice3 */ + sseu->eu_mask[3 * sseu->max_subslices] = (eu_en >> 24) & eu_mask; + eu_en = ~I915_READ(GEN8_EU_DISABLE2); + sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask; + /* Slice4 */ + sseu->eu_mask[4 * sseu->max_subslices] = (eu_en >> 8) & eu_mask; + sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask; + /* Slice5 */ + sseu->eu_mask[5 * sseu->max_subslices] = (eu_en >> 24) & eu_mask; + eu_en = ~I915_READ(GEN10_EU_DISABLE3); + sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask; + + /* Do a second pass where we marked the subslices disabled if all + * their eus are off. + */ + for (s = 0; s < sseu->max_slices; s++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0) + sseu->subslices_mask[s] &= ~BIT(ss); + } + } - sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1)); - sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2)); - sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) & - GEN10_EU_DIS_SS_MASK)); + sseu->eu_total = compute_eu_total(sseu); /* * CNL is expected to always have a uniform distribution @@ -118,26 +170,30 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - u32 fuse, eu_dis; + u32 fuse; fuse = I915_READ(CHV_FUSE_GT); sseu->slice_mask = BIT(0); + sseu->max_slices = 1; + sseu->max_subslices = 2; + sseu->max_eus_per_subslice = 8; if (!(fuse & CHV_FGT_DISABLE_SS0)) { - sseu->subslice_mask |= BIT(0); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK | - CHV_FGT_EU_DIS_SS0_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + sseu->subslices_mask[0] |= BIT(0); + sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT; + sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4; + sseu->subslices_mask[0] = 1; } if (!(fuse & CHV_FGT_DISABLE_SS1)) { - sseu->subslice_mask |= BIT(1); - eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK | - CHV_FGT_EU_DIS_SS1_R1_MASK); - sseu->eu_total += 8 - hweight32(eu_dis); + sseu->subslices_mask[0] |= BIT(1); + sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT; + sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4; } + sseu->eu_total = compute_eu_total(sseu); + /* * CHV expected to always have a uniform distribution of EU * across subslices. @@ -159,41 +215,50 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) { struct intel_device_info *info = mkwrite_device_info(dev_priv); struct sseu_dev_info *sseu = &info->sseu; - int s_max = 3, ss_max = 4, eu_max = 8; int s, ss; - u32 fuse2, eu_disable; + u32 fuse2, eu_disable, subslice_mask; u8 eu_mask = 0xff; fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + /* BXT has a single slice and at most 3 subslices. */ + sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3; + sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = (1 << ss_max) - 1; - sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> - GEN9_F2_SS_DIS_SHIFT); + subslice_mask = (1 << sseu->max_subslices) - 1; + subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> + GEN9_F2_SS_DIS_SHIFT); /* * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; + sseu->subslices_mask[s] = subslice_mask; + eu_disable = I915_READ(GEN9_EU_DISABLE(s)); - for (ss = 0; ss < ss_max; ss++) { + for (ss = 0; ss < sseu->max_subslices; ss++) { int eu_per_ss; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslices_mask[s] & BIT(ss))) /* skip disabled subslice */ continue; - eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) & - eu_mask); + sseu->eu_mask[ss + s * sseu->max_subslices] = + ~((eu_disable >> (ss*8)) & eu_mask); + + eu_per_ss = sseu->max_eus_per_subslice - + hweight8((eu_disable >> (ss*8)) & eu_mask); /* * Record which subslice(s) has(have) 7 EUs. we @@ -202,11 +267,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) */ if (eu_per_ss == 7) sseu->subslice_7eu[s] |= BIT(ss); - - sseu->eu_total += eu_per_ss; } } + sseu->eu_total = compute_eu_total(sseu); + /* * SKL is expected to always have a uniform distribution * of EU across subslices with the exception that any one @@ -232,8 +297,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) sseu->has_eu_pg = sseu->eu_per_subslice > 2; if (IS_GEN9_LP(dev_priv)) { -#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask & BIT(ss))) - info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3; +#define IS_SS_DISABLED(ss) (!(sseu->subslices_mask[0] & BIT(ss))) + info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3; sseu->min_eu_in_pool = 0; if (info->has_pooled_eu) { @@ -251,19 +316,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv) static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; - const int s_max = 3, ss_max = 3, eu_max = 8; int s, ss; - u32 fuse2, eu_disable[3]; /* s_max */ + u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; + sseu->max_slices = 3; + sseu->max_subslices = 3; + sseu->max_eus_per_subslice = 8; + /* * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = GENMASK(ss_max - 1, 0); - sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> - GEN8_F2_SS_DIS_SHIFT); + subslice_mask = GENMASK(sseu->max_subslices - 1, 0); + subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> + GEN8_F2_SS_DIS_SHIFT); eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK; eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) | @@ -277,30 +345,36 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * Iterate through enabled slices and subslices to * count the total enabled EU. */ - for (s = 0; s < s_max; s++) { + for (s = 0; s < sseu->max_slices; s++) { if (!(sseu->slice_mask & BIT(s))) /* skip disabled slice */ continue; - for (ss = 0; ss < ss_max; ss++) { + sseu->subslices_mask[s] = subslice_mask; + + for (ss = 0; ss < sseu->max_subslices; ss++) { u32 n_disabled; - if (!(sseu->subslice_mask & BIT(ss))) + if (!(sseu->subslices_mask[ss] & BIT(ss))) /* skip disabled subslice */ continue; - n_disabled = hweight8(eu_disable[s] >> (ss * eu_max)); + sseu->eu_mask[ss + s * sseu->max_subslices] = + ~(eu_disable[s] >> + (ss * sseu->max_eus_per_subslice)); + n_disabled = hweight8(eu_disable[s] >> + (ss * sseu->max_eus_per_subslice)); /* * Record which subslices have 7 EUs. */ - if (eu_max - n_disabled == 7) + if (sseu->max_eus_per_subslice - n_disabled == 7) sseu->subslice_7eu[s] |= 1 << ss; - - sseu->eu_total += eu_max - n_disabled; } } + sseu->eu_total = compute_eu_total(sseu); + /* * BDW is expected to always have a uniform distribution of EU across * subslices with the exception that any one EU in any one subslice may @@ -437,6 +511,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) { struct intel_device_info *info = mkwrite_device_info(dev_priv); enum pipe pipe; + int s; if (INTEL_GEN(dev_priv) >= 10) { for_each_pipe(dev_priv, pipe) @@ -548,9 +623,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", sseu_subslice_total(&info->sseu)); - DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask); - DRM_DEBUG_DRIVER("subslice per slice: %u\n", - hweight8(info->sseu.subslice_mask)); + for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) { + DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslices_mask[s]); + DRM_DEBUG_DRIVER("subslice per slice: %u\n", + hweight8(info->sseu.subslices_mask[s])); + } DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total); DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice); DRM_DEBUG_DRIVER("has slice power gating: %s\n", diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 2a8160f603ab..bbc724b1ab56 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2073,7 +2073,7 @@ make_rpcs(struct drm_i915_private *dev_priv) if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) { rpcs |= GEN8_RPCS_SS_CNT_ENABLE; - rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) << + rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) << GEN8_RPCS_SS_CNT_SHIFT; rpcs |= GEN8_RPCS_ENABLE; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c68ab3ead83c..9c4434438965 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) #define instdone_subslice_mask(dev_priv__) \ (INTEL_GEN(dev_priv__) == 7 ? \ - 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask) + 1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0]) #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \ for ((slice__) = 0, (subslice__) = 0; \ -- 2.15.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel