On Wed, Aug 27, 2014 at 05:33:12PM +0300, ville.syrjala@xxxxxxxxxxxxxxx wrote: > From: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > > Follow the BDW example and apply the workarounds touching registers > which are saved in the context image through LRIs in the new > ring->init_context() hook. > > This makes Mesa much happier and eg. glxgears doesn't hang after > the first frame. > > Cc: Arun Siluvery <arun.siluvery@xxxxxxxxxxxxxxx> > Signed-off-by: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_pm.c | 14 ------------- > drivers/gpu/drm/i915/intel_ringbuffer.c | 36 +++++++++++++++++++++++++++++++-- > 2 files changed, 34 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index bbe65d5..437f25a 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -5841,14 +5841,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) > > I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE); > > - /* WaDisablePartialInstShootdown:chv */ > - I915_WRITE(GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); > - > - /* WaDisableThreadStallDopClockGating:chv */ > - I915_WRITE(GEN8_ROW_CHICKEN, > - _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); > - > /* WaVSRefCountFullforceMissDisable:chv */ > /* WaDSRefCountFullforceMissDisable:chv */ > I915_WRITE(GEN7_FF_THREAD_MODE, > @@ -5867,10 +5859,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) > I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | > GEN8_SDEUNIT_CLOCK_GATE_DISABLE); > > - /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ > - I915_WRITE(HALF_SLICE_CHICKEN3, > - _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); > - > /* WaDisableGunitClockGating:chv (pre-production hw) */ > I915_WRITE(VLV_GUNIT_CLOCK_GATE, I915_READ(VLV_GUNIT_CLOCK_GATE) | > GINT_DIS); > @@ -5880,8 +5868,6 @@ static void cherryview_init_clock_gating(struct drm_device *dev) > _MASKED_BIT_ENABLE(GEN8_FF_DOP_CLOCK_GATE_DISABLE)); > > /* WaDisableDopClockGating:chv (pre-production hw) */ > - I915_WRITE(GEN7_ROW_CHICKEN2, > - _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); > I915_WRITE(GEN6_UCGCTL1, I915_READ(GEN6_UCGCTL1) | > GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index cef8465..42d9b43 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -658,7 +658,7 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring, > intel_ring_emit(ring, value); > } > > -static int gen8_init_workarounds(struct intel_engine_cs *ring) > +static int bdw_init_workarounds(struct intel_engine_cs *ring) Sorry, these are continuing naming gripes. *_ring_init_context so that there isn't that much disconnect between vfunc and function. > { > int ret; > > @@ -728,6 +728,35 @@ static int gen8_init_workarounds(struct intel_engine_cs *ring) > return 0; > } > > +static int chv_init_workarounds(struct intel_engine_cs *ring) > +{ > + int ret; > + > + ret = intel_ring_begin(ring, 12); > + if (ret) > + return ret; > + > + /* WaDisablePartialInstShootdown:chv */ > + intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN, > + _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE)); intel_ring_emit_lri(); However, if we get fancy, you can do these 4 in a single command intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(3)); intel_ring_emit(ring, GEN8_ROW_CHICKEN); intel_ring_emit(ring, _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) | _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); (perhaps even intel_ring_emit_pair(ring, reg, value)) intel_ring_emit(ring, GEN7_ROW_CHICKEN2); intel_ring_emit(ring, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)); intel_ring_emit(ring, HALF_SLICE_CHICKEN3, intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); hmm, actually if you do int i915_request_emit_lri(rq, int num_registers, ...) { struct intel_ringbuffer *ring; va_list ap; ring = intel_ring_begin(rq, 2*num_registers + 2); if (IS_ERR(ring)) return PTR_ERR(ring); va_start(ap, num_registers); intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_registers)); while (num_registers--) { intel_ring_emit(ring, va_arg(ap, u32)); intel_ring_emit(ring, va_arg(ap, u32)); } intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); return 0; } then static int chv_ring_init_context(struct i915_request *rq) { return i915_request_emit_lri(rq, 3, /* WaDisablePartialInstShootdown:chv */ /* WaDisableThreadStallDopClockGating:chv */ GEN8_ROW_CHICKEN, _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) | _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE) /* WaDisableDopClockGating:chv (pre-production hw) */ GEN7_ROW_CHICKEN2, _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE), /* WaDisableSamplerPowerBypass:chv (pre-production hw) */ HALF_SLICE_CHICKEN3, _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); } Just not fond of intel_ring_emit_lri() as we the current intel_ring_emit* style should not be calling intel_ring_begin() itself. -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx