On Sat, Feb 14, 2015 at 06:30:29PM +0000, Damien Lespiau wrote: > When one EU is disabled in a particular subslice, we can tune how the > work is spread between subslices to improve EU utilization. > > v2: - Use a bitfield to record which subslice(s) has(have) 7 EUs. That > will also make the machinery work if several sublices have 7 EUs. > (Jeff Mcgee) > - Only apply the different hashing algorithm if the slice is > effectively unbalanced by checking there's a single subslice with > 7 EUs. (Jeff Mcgee) > > v3: Fix typo in comment (Jeff Mcgee) > > Issue: VIZ-3845 > Cc: Jeff Mcgee <jeff.mcgee@xxxxxxxxx> > Reviewed-by: Jeff Mcgee <jeff.mcgee@xxxxxxxxx> > Signed-off-by: Damien Lespiau <damien.lespiau@xxxxxxxxx> Queued for -next, thanks for the patch. -Daniel > --- > drivers/gpu/drm/i915/i915_dma.c | 17 ++++++++++--- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_reg.h | 2 ++ > drivers/gpu/drm/i915/intel_ringbuffer.c | 45 ++++++++++++++++++++++++++++++++- > 4 files changed, 62 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 70acfe0..39164ff 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -649,13 +649,24 @@ static void intel_device_info_runtime_init(struct drm_device *dev) > continue; > > for (ss = 0; ss < ss_max; ss++) { > + u32 n_disabled; > + > if (ss_disable & (0x1 << ss)) > /* skip disabled subslice */ > continue; > > - info->eu_total += eu_max - > - hweight8(eu_disable[s] >> > - (ss * eu_max)); > + n_disabled = hweight8(eu_disable[s] >> > + (ss * eu_max)); > + > + /* > + * Record which subslice(s) has(have) 7 EUs. we > + * can tune the hash used to spread work among > + * subslices if they are unbalanced. > + */ > + if (eu_max - n_disabled == 7) > + info->subslice_7eu[s] |= 1 << ss; > + > + info->eu_total += eu_max - n_disabled; > } > } > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d910bac..5b4794b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -699,6 +699,8 @@ struct intel_device_info { > u8 subslice_per_slice; > u8 eu_total; > u8 eu_per_subslice; > + /* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */ > + u8 subslice_7eu[3]; > u8 has_slice_pg:1; > u8 has_subslice_pg:1; > u8 has_eu_pg:1; > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 40f71bd..0aa6437 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -1341,6 +1341,8 @@ enum skl_disp_power_wells { > #define GEN6_WIZ_HASHING_16x4 GEN6_WIZ_HASHING(1, 0) > #define GEN6_WIZ_HASHING_MASK GEN6_WIZ_HASHING(1, 1) > #define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) > +#define GEN9_IZ_HASHING_MASK(slice) (0x3 << (slice * 2)) > +#define GEN9_IZ_HASHING(slice, val) ((val) << (slice * 2)) > > #define GFX_MODE 0x02520 > #define GFX_MODE_GEN7 0x0229c > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 443e19c..c24d856 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -937,6 +937,49 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) > return 0; > } > > +static int skl_tune_iz_hashing(struct intel_engine_cs *ring) > +{ > + struct drm_device *dev = ring->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + u8 vals[3] = { 0, 0, 0 }; > + unsigned int i; > + > + for (i = 0; i < 3; i++) { > + u8 ss; > + > + /* > + * Only consider slices where one, and only one, subslice has 7 > + * EUs > + */ > + if (hweight8(dev_priv->info.subslice_7eu[i]) != 1) > + continue; > + > + /* > + * subslice_7eu[i] != 0 (because of the check above) and > + * ss_max == 4 (maximum number of subslices possible per slice) > + * > + * -> 0 <= ss <= 3; > + */ > + ss = ffs(dev_priv->info.subslice_7eu[i]) - 1; > + vals[i] = 3 - ss; > + } > + > + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) > + return 0; > + > + /* Tune IZ hashing. See intel_device_info_runtime_init() */ > + WA_SET_FIELD_MASKED(GEN7_GT_MODE, > + GEN9_IZ_HASHING_MASK(2) | > + GEN9_IZ_HASHING_MASK(1) | > + GEN9_IZ_HASHING_MASK(0), > + GEN9_IZ_HASHING(2, vals[2]) | > + GEN9_IZ_HASHING(1, vals[1]) | > + GEN9_IZ_HASHING(0, vals[0])); > + > + return 0; > +} > + > + > static int skl_init_workarounds(struct intel_engine_cs *ring) > { > struct drm_device *dev = ring->dev; > @@ -956,7 +999,7 @@ static int skl_init_workarounds(struct intel_engine_cs *ring) > HDC_FENCE_DEST_SLM_DISABLE | > HDC_BARRIER_PERFORMANCE_DISABLE); > > - return 0; > + return skl_tune_iz_hashing(ring); > } > > int init_workarounds_ring(struct intel_engine_cs *ring) > -- > 1.8.3.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx