On Mon, Jul 28, 2014 at 05:31:46PM +0100, arun.siluvery@xxxxxxxxxxxxxxx wrote: > From: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> > > The workarounds at the moment are initialized in init_clock_gating() but > they are lost during reset; In case of execlists some workarounds modify > registers that are part of register state context, since these are not > initialized until init_clock_gating() default context ends up with > incorrect values as render context is restored and saved before updated > by workarounds hence move them to render ring init fn. This should be > ok as these workarounds are not related to display clock gating. > > Signed-off-by: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 46 ++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_pm.c | 59 ---------------------------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 68 +++++++++++++++++++++++++++++++++ > 3 files changed, 114 insertions(+), 59 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 083683c..cf7da30 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -2397,20 +2397,65 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) > seq_printf(m, " dpll_md: 0x%08x\n", pll->hw_state.dpll_md); > seq_printf(m, " fp0: 0x%08x\n", pll->hw_state.fp0); > seq_printf(m, " fp1: 0x%08x\n", pll->hw_state.fp1); > seq_printf(m, " wrpll: 0x%08x\n", pll->hw_state.wrpll); > } > drm_modeset_unlock_all(dev); > > return 0; > } > > +static int i915_workaround_info(struct seq_file *m, void *unused) > +{ > + struct drm_info_node *node = (struct drm_info_node *) m->private; > + struct drm_device *dev = node->minor->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + int ret; > + > + ret = mutex_lock_interruptible(&dev->struct_mutex); > + if (ret) > + return ret; > + > + if (IS_BROADWELL(dev)) { > + seq_printf(m, "GEN8_ROW_CHICKEN:\t0x%08x\n", > + I915_READ(GEN8_ROW_CHICKEN)); > + seq_printf(m, "HALF_SLICE_CHICKEN3:\t0x%08x\n", > + I915_READ(HALF_SLICE_CHICKEN3)); > + seq_printf(m, "GAMTARBMODE:\t0x%08x\n", I915_READ(GAMTARBMODE)); > + seq_printf(m, "_3D_CHICKEN3:\t0x%08x\n", > + I915_READ(_3D_CHICKEN3)); > + seq_printf(m, "COMMON_SLICE_CHICKEN2:\t0x%08x\n", > + I915_READ(COMMON_SLICE_CHICKEN2)); > + seq_printf(m, "GEN7_HALF_SLICE_CHICKEN1:\t0x%08x\n", > + I915_READ(GEN7_HALF_SLICE_CHICKEN1)); > + seq_printf(m, "GEN7_ROW_CHICKEN2:\t0x%08x\n", > + I915_READ(GEN7_ROW_CHICKEN2)); > + seq_printf(m, "GAM_ECOCHK:\t0x%08x\n", > + I915_READ(GAM_ECOCHK)); > + seq_printf(m, "HDC_CHICKEN0:\t0x%08x\n", > + I915_READ(HDC_CHICKEN0)); > + seq_printf(m, "GEN7_FF_THREAD_MODE:\t0x%08x\n", > + I915_READ(GEN7_FF_THREAD_MODE)); > + seq_printf(m, "GEN8_UCGCTL6:\t0x%08x\n", > + I915_READ(GEN8_UCGCTL6)); > + seq_printf(m, "GEN6_RC_SLEEP_PSMI_CONTROL:\t0x%08x\n", > + I915_READ(GEN6_RC_SLEEP_PSMI_CONTROL)); > + seq_printf(m, "CACHE_MODE_1:\t0x%08x\n", > + I915_READ(CACHE_MODE_1)); > + } else > + DRM_DEBUG_DRIVER("Not available for Gen%d\n", > + INTEL_INFO(dev)->gen); > + > + mutex_unlock(&dev->struct_mutex); > + return 0; > +} > + This smells like a separate patch. But I'm not sure we want at all since intel_reg_read will provide the same information. > struct pipe_crc_info { > const char *name; > struct drm_device *dev; > enum pipe pipe; > }; > > static int i915_pipe_crc_open(struct inode *inode, struct file *filep) > { > struct pipe_crc_info *info = inode->i_private; > struct drm_i915_private *dev_priv = info->dev->dev_private; > @@ -3904,20 +3949,21 @@ static const struct drm_info_list i915_debugfs_list[] = { > {"i915_ppgtt_info", i915_ppgtt_info, 0}, > {"i915_llc", i915_llc, 0}, > {"i915_edp_psr_status", i915_edp_psr_status, 0}, > {"i915_sink_crc_eDP1", i915_sink_crc, 0}, > {"i915_energy_uJ", i915_energy_uJ, 0}, > {"i915_pc8_status", i915_pc8_status, 0}, > {"i915_power_domain_info", i915_power_domain_info, 0}, > {"i915_display_info", i915_display_info, 0}, > {"i915_semaphore_status", i915_semaphore_status, 0}, > {"i915_shared_dplls_info", i915_shared_dplls_info, 0}, > + {"i915_workaround_info", i915_workaround_info, 0}, > }; > #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list) > > static const struct i915_debugfs_files { > const char *name; > const struct file_operations *fops; > } i915_debugfs_files[] = { > {"i915_wedged", &i915_wedged_fops}, > {"i915_max_freq", &i915_max_freq_fops}, > {"i915_min_freq", &i915_min_freq_fops}, > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 3f88f29..9597f95 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -5393,101 +5393,42 @@ static void gen8_init_clock_gating(struct drm_device *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > enum pipe pipe; > > I915_WRITE(WM3_LP_ILK, 0); > I915_WRITE(WM2_LP_ILK, 0); > I915_WRITE(WM1_LP_ILK, 0); > > /* FIXME(BDW): Check all the w/a, some might only apply to > * pre-production hw. */ > > - /* WaDisablePartialInstShootdown:bdw */ > - I915_WRITE(GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); > - > - /* WaDisableThreadStallDopClockGating:bdw */ > - /* FIXME: Unclear whether we really need this on production bdw. */ > - I915_WRITE(GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); > - > - /* > - * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for > - * pre-production hardware > - */ > - I915_WRITE(HALF_SLICE_CHICKEN3, > - _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); > - I915_WRITE(HALF_SLICE_CHICKEN3, > - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); > - I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); > - > - I915_WRITE(_3D_CHICKEN3, > - _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); > - > - I915_WRITE(COMMON_SLICE_CHICKEN2, > - _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); > - > - I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, > - _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); > - > - /* WaDisableDopClockGating:bdw May not be needed for production */ > - I915_WRITE(GEN7_ROW_CHICKEN2, > - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); > - > - /* WaSwitchSolVfFArbitrationPriority:bdw */ > - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); > > /* WaPsrDPAMaskVBlankInSRD:bdw */ > I915_WRITE(CHICKEN_PAR1_1, > I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); > > /* WaPsrDPRSUnmaskVBlankInSRD:bdw */ > for_each_pipe(pipe) { > I915_WRITE(CHICKEN_PIPESL_1(pipe), > I915_READ(CHICKEN_PIPESL_1(pipe)) | > BDW_DPRS_MASK_VBLANK_SRD); > } > > - /* Use Force Non-Coherent whenever executing a 3D context. This is a > - * workaround for for a possible hang in the unlikely event a TLB > - * invalidation occurs during a PSD flush. > - */ > - I915_WRITE(HDC_CHICKEN0, > - I915_READ(HDC_CHICKEN0) | > - _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); > - > - /* WaVSRefCountFullforceMissDisable:bdw */ > - /* WaDSRefCountFullforceMissDisable:bdw */ > - I915_WRITE(GEN7_FF_THREAD_MODE, > - I915_READ(GEN7_FF_THREAD_MODE) & > - ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); > - > /* > * BSpec recommends 8x4 when MSAA is used, > * however in practice 16x4 seems fastest. > * > * Note that PS/WM thread counts depend on the WIZ hashing > * disable bit, which we don't touch here, but it's good > * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). > */ > I915_WRITE(GEN7_GT_MODE, > GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4); GT_MODE doesn't get clobbered by a reset? I would have expected it to behave the same way as CACHE_MODE_{0,1). > - > - I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, > - _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); > - > - /* WaDisableSDEUnitClockGating:bdw */ > - I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | > - GEN8_SDEUNIT_CLOCK_GATE_DISABLE); And I would have expected the UCGCTL registers to not be clobbered. IIRC they didn't get clobbered on my IVB, but my memory might be wrong here. > - > - /* Wa4x4STCOptimizationDisable:bdw */ > - I915_WRITE(CACHE_MODE_1, > - _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); > } > > static void haswell_init_clock_gating(struct drm_device *dev) > { > struct drm_i915_private *dev_priv = dev->dev_private; > > ilk_init_lp_watermarks(dev); > > /* L3 caching of data atomics doesn't work -- disable it. */ > I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index b3d8f76..6818e34 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -591,20 +591,85 @@ init_pipe_control(struct intel_engine_cs *ring) > return 0; > > err_unpin: > i915_gem_object_ggtt_unpin(ring->scratch.obj); > err_unref: > drm_gem_object_unreference(&ring->scratch.obj->base); > err: > return ret; > } > > +static void bdw_init_workarounds(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + /* WaDisablePartialInstShootdown:bdw */ > + I915_WRITE(GEN8_ROW_CHICKEN, > + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); > + > + /* WaDisableThreadStallDopClockGating:bdw */ > + /* FIXME: Unclear whether we really need these on production bdw. */ > + I915_WRITE(GEN8_ROW_CHICKEN, > + _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); > + > + /* > + * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for > + * pre-production hardware > + */ > + I915_WRITE(HALF_SLICE_CHICKEN3, > + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); > + I915_WRITE(HALF_SLICE_CHICKEN3, > + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); > + I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); > + > + I915_WRITE(_3D_CHICKEN3, > + _MASKED_BIT_ENABLE(_3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2))); > + > + I915_WRITE(COMMON_SLICE_CHICKEN2, > + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); > + > + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, > + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); > + > + /* WaDisableDopClockGating:bdw May not be needed for production */ > + I915_WRITE(GEN7_ROW_CHICKEN2, > + _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); > + > + /* WaSwitchSolVfFArbitrationPriority:bdw */ > + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); > + > + /* Use Force Non-Coherent whenever executing a 3D context. This is a > + * workaround for for a possible hang in the unlikely event a TLB > + * invalidation occurs during a PSD flush. > + */ > + I915_WRITE(HDC_CHICKEN0, > + I915_READ(HDC_CHICKEN0) | You might kill this read since it's a masked register. Maybe as a separate prep-patch. > + _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)); > + > + /* WaVSRefCountFullforceMissDisable:bdw */ > + /* WaDSRefCountFullforceMissDisable:bdw */ > + I915_WRITE(GEN7_FF_THREAD_MODE, > + I915_READ(GEN7_FF_THREAD_MODE) & > + ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME)); > + > + /* WaDisableSDEUnitClockGating:bdw */ > + I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | > + GEN8_SDEUNIT_CLOCK_GATE_DISABLE); > + > + I915_WRITE(GEN6_RC_SLEEP_PSMI_CONTROL, > + _MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE)); > + > + /* Wa4x4STCOptimizationDisable:bdw */ > + I915_WRITE(CACHE_MODE_1, > + _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)); I think it would be nice to reorder this function a bit to keep similar registers close to each other. The current mismash makes it easy to overlook a workaround and then we may end up with duplicates (wouldn't be the first time that happened). > +} > + > static int init_render_ring(struct intel_engine_cs *ring) > { > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > int ret = init_ring_common(ring); > if (ret) > return ret; > > /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ > if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7) > @@ -646,20 +711,23 @@ static int init_render_ring(struct intel_engine_cs *ring) > I915_WRITE(CACHE_MODE_0, > _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); > } > > if (INTEL_INFO(dev)->gen >= 6) > I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); > > if (HAS_L3_DPF(dev)) > I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); > > + if (IS_BROADWELL(dev)) > + bdw_init_workarounds(dev); > + > return ret; > } > > static void render_ring_cleanup(struct intel_engine_cs *ring) > { > struct drm_device *dev = ring->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > > if (dev_priv->semaphore_obj) { > i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); > -- > 1.9.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx