On Tue, Jun 15, 2021 at 05:42:13PM -0700, Matt Roper wrote: > Because Render Power Gating restricts us to just a single subslice as a > valid steering target for reads of multicast registers in a SUBSLICE > range, the default steering we setup at init may not lead to a suitable > target for L3BANK multicast register. In cases where it does not, use > explicit runtime steering whenever an L3BANK multicast register is read. > > While we're at it, let's simplify the function a little bit and drop its > support for gen10/CNL since no such platforms ever materialized for real > use. Multicast register steering is already an area that causes enough > confusion; no need to complicate it with what's effectively dead code. > > v2: > - Use gt->uncore instead of gt->i915->uncore. (Tvrtko) > - Use {} as table terminator. (Rodrigo) > > Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> > Cc: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > Signed-off-by: Matt Roper <matthew.d.roper@xxxxxxxxx> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_gt.c | 18 +++++ > drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 + > drivers/gpu/drm/i915/gt/intel_workarounds.c | 84 ++++++--------------- > 3 files changed, 46 insertions(+), 60 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c > index 66299105da66..25a3ecf9892a 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -83,6 +83,11 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) > gt->ggtt = ggtt; > } > > +static const struct intel_mmio_range icl_l3bank_steering_table[] = { > + { 0x00B100, 0x00B3FF }, > + {}, > +}; > + > int intel_gt_init_mmio(struct intel_gt *gt) > { > intel_gt_init_clock_frequency(gt); > @@ -90,6 +95,13 @@ int intel_gt_init_mmio(struct intel_gt *gt) > intel_uc_init_mmio(>->uc); > intel_sseu_info_init(gt); > > + if (GRAPHICS_VER(gt->i915) >= 11) { > + gt->steering_table[L3BANK] = icl_l3bank_steering_table; > + gt->info.l3bank_mask = > + intel_uncore_read(gt->uncore, GEN10_MIRROR_FUSE3) & > + GEN10_L3BANK_MASK; > + } > + > return intel_engines_init_mmio(gt); > } > > @@ -744,6 +756,12 @@ static void intel_gt_get_valid_steering(struct intel_gt *gt, > u8 *sliceid, u8 *subsliceid) > { > switch (type) { > + case L3BANK: > + GEM_DEBUG_WARN_ON(!gt->info.l3bank_mask); /* should be impossible! */ > + > + *sliceid = __ffs(gt->info.l3bank_mask); > + *subsliceid = 0; /* unused */ > + break; > default: > MISSING_CASE(type); > *sliceid = 0; > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h > index f2c274eee1e6..80dc131e862f 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > @@ -48,6 +48,8 @@ struct intel_mmio_range { > * need to explicitly re-steer reads of registers of the other type. > */ > enum intel_steering_type { > + L3BANK, > + > NUM_STEERING_TYPES > }; > > @@ -174,6 +176,8 @@ struct intel_gt { > /* Media engine access to SFC per instance */ > u8 vdbox_sfc_access; > > + u32 l3bank_mask; > + > /* Slice/subslice/EU info */ > struct sseu_dev_info sseu; > } info; > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c > index 93c74d4cae02..d9a5a445ceec 100644 > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c > @@ -945,71 +945,37 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) > } > > static void > -wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) > +icl_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) > { > const struct sseu_dev_info *sseu = &i915->gt.info.sseu; > unsigned int slice, subslice; > - u32 l3_en, mcr, mcr_mask; > + u32 mcr, mcr_mask; > > - GEM_BUG_ON(GRAPHICS_VER(i915) < 10); > + GEM_BUG_ON(GRAPHICS_VER(i915) < 11); > + GEM_BUG_ON(hweight8(sseu->slice_mask) > 1); > + slice = 0; > > /* > - * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl > - * L3Banks could be fused off in single slice scenario. If that is > - * the case, we might need to program MCR select to a valid L3Bank > - * by default, to make sure we correctly read certain registers > - * later on (in the range 0xB100 - 0xB3FF). > - * > - * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl > - * Before any MMIO read into slice/subslice specific registers, MCR > - * packet control register needs to be programmed to point to any > - * enabled s/ss pair. Otherwise, incorrect values will be returned. > - * This means each subsequent MMIO read will be forwarded to an > - * specific s/ss combination, but this is OK since these registers > - * are consistent across s/ss in almost all cases. In the rare > - * occasions, such as INSTDONE, where this value is dependent > - * on s/ss combo, the read should be done with read_subslice_reg. > - * > - * Since GEN8_MCR_SELECTOR contains dual-purpose bits which select both > - * to which subslice, or to which L3 bank, the respective mmio reads > - * will go, we have to find a common index which works for both > - * accesses. > - * > - * Case where we cannot find a common index fortunately should not > - * happen in production hardware, so we only emit a warning instead of > - * implementing something more complex that requires checking the range > - * of every MMIO read. > + * Although a platform may have subslices, we need to always steer > + * reads to the lowest instance that isn't fused off. When Render > + * Power Gating is enabled, grabbing forcewake will only power up a > + * single subslice (the "minconfig") if there isn't a real workload > + * that needs to be run; this means that if we steer register reads to > + * one of the higher subslices, we run the risk of reading back 0's or > + * random garbage. > */ > + subslice = __ffs(intel_sseu_get_subslices(sseu, slice)); > > - if (GRAPHICS_VER(i915) >= 10 && is_power_of_2(sseu->slice_mask)) { > - u32 l3_fuse = > - intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3) & > - GEN10_L3BANK_MASK; > - > - drm_dbg(&i915->drm, "L3 fuse = %x\n", l3_fuse); > - l3_en = ~(l3_fuse << GEN10_L3BANK_PAIR_COUNT | l3_fuse); > - } else { > - l3_en = ~0; > - } > + /* > + * If the subslice we picked above also steers us to a valid L3 bank, > + * then we can just rely on the default steering and won't need to > + * worry about explicitly re-steering L3BANK reads later. > + */ > + if (i915->gt.info.l3bank_mask & BIT(subslice)) > + i915->gt.steering_table[L3BANK] = NULL; > > - slice = fls(sseu->slice_mask) - 1; > - subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice)); > - if (!subslice) { > - drm_warn(&i915->drm, > - "No common index found between subslice mask %x and L3 bank mask %x!\n", > - intel_sseu_get_subslices(sseu, slice), l3_en); > - subslice = fls(l3_en); > - drm_WARN_ON(&i915->drm, !subslice); > - } > - subslice--; > - > - if (GRAPHICS_VER(i915) >= 11) { > - mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); > - mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; > - } else { > - mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); > - mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; > - } > + mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); > + mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; > > drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); > > @@ -1019,8 +985,6 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) > static void > cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) > { > - wa_init_mcr(i915, wal); > - > /* WaInPlaceDecompressionHang:cnl */ > wa_write_or(wal, > GEN9_GAMT_ECO_REG_RW_IA, > @@ -1030,7 +994,7 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) > static void > icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) > { > - wa_init_mcr(i915, wal); > + icl_wa_init_mcr(i915, wal); > > /* WaInPlaceDecompressionHang:icl */ > wa_write_or(wal, > @@ -1112,7 +1076,7 @@ static void > gen12_gt_workarounds_init(struct drm_i915_private *i915, > struct i915_wa_list *wal) > { > - wa_init_mcr(i915, wal); > + icl_wa_init_mcr(i915, wal); > > /* Wa_14011060649:tgl,rkl,dg1,adls,adl-p */ > wa_14011060649(i915, wal); > -- > 2.25.4 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx