Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> writes: > On Baytrail, we manually calculate busyness over the evaluation interval > to avoid issues with miscaluations with RC6 enabled. However, it turns > out that the DOWN_EI interrupt generator is completely bust - it > operates in two modes, continuous or never. Neither of which are > conducive to good behaviour. Stop unmask the DOWN_EI interrupt and just > compute everything from the UP_EI which does seem to correspond to the > desired interval. > > v2: Fixup gen6_rps_pm_mask() as well > v3: Inline vlv_c0_above() to combine the now identical elapsed > calculation for up/down and simplify the threshold testing > > Fixes: 43cf3bf084ba ("drm/i915: Improved w/a for rps on Baytrail") > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx> > Cc: <stable@xxxxxxxxxxxxxxx> # v4.1+ > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 +- > drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++------------------------- > drivers/gpu/drm/i915/intel_pm.c | 5 +-- > 3 files changed, 32 insertions(+), 48 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index c52aee5141ca..b399c8fe5ac1 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1377,7 +1377,7 @@ struct intel_gen6_power_mgmt { > unsigned boosts; > > /* manual wa residency calculations */ > - struct intel_rps_ei up_ei, down_ei; > + struct intel_rps_ei ei; > > /* > * Protects RPS/RC6 register access and PCU communication. > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index d91078f8e9d4..b3f9181ef314 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1083,68 +1083,51 @@ static void vlv_c0_read(struct drm_i915_private *dev_priv, > ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT); > } > > -static bool vlv_c0_above(struct drm_i915_private *dev_priv, > - const struct intel_rps_ei *old, > - const struct intel_rps_ei *now, > - int threshold) > -{ > - u64 time, c0; > - unsigned int mul = 100; > - > - if (old->cz_clock == 0) > - return false; > - > - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) > - mul <<= 8; > - > - time = now->cz_clock - old->cz_clock; > - time *= threshold * dev_priv->czclk_freq; > - > - /* Workload can be split between render + media, e.g. SwapBuffers > - * being blitted in X after being rendered in mesa. To account for > - * this we need to combine both engines into our activity counter. > - */ > - c0 = now->render_c0 - old->render_c0; > - c0 += now->media_c0 - old->media_c0; > - c0 *= mul * VLV_CZ_CLOCK_TO_MILLI_SEC; > - > - return c0 >= time; > -} > - > void gen6_rps_reset_ei(struct drm_i915_private *dev_priv) > { > - vlv_c0_read(dev_priv, &dev_priv->rps.down_ei); > - dev_priv->rps.up_ei = dev_priv->rps.down_ei; > + memset(&dev_priv->rps.ei, 0, sizeof(dev_priv->rps.ei)); > } > > static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) > { > + const struct intel_rps_ei *prev = &dev_priv->rps.ei; > struct intel_rps_ei now; > u32 events = 0; > > - if ((pm_iir & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) > + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0) > return 0; > > vlv_c0_read(dev_priv, &now); > if (now.cz_clock == 0) > return 0; > > - if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) { > - if (!vlv_c0_above(dev_priv, > - &dev_priv->rps.down_ei, &now, > - dev_priv->rps.down_threshold)) > - events |= GEN6_PM_RP_DOWN_THRESHOLD; > - dev_priv->rps.down_ei = now; > - } > + if (prev->cz_clock) { > + u64 time, c0; > + unsigned int mul; > > - if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { > - if (vlv_c0_above(dev_priv, > - &dev_priv->rps.up_ei, &now, > - dev_priv->rps.up_threshold)) > - events |= GEN6_PM_RP_UP_THRESHOLD; > - dev_priv->rps.up_ei = now; > + mul = VLV_CZ_CLOCK_TO_MILLI_SEC * 100; /* scale to % */ scale to % as our thresholds are in %. But tiny bikeshed, so Reviewed-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > + if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) > + mul <<= 8; > + > + time = now.cz_clock - prev->cz_clock; > + time *= dev_priv->czclk_freq; > + > + /* Workload can be split between render + media, > + * e.g. SwapBuffers being blitted in X after being rendered in > + * mesa. To account for this we need to combine both engines > + * into our activity counter. > + */ > + c0 = now.render_c0 - prev->render_c0; > + c0 += now.media_c0 - prev->media_c0; > + c0 *= mul; > + > + if (c0 > time * dev_priv->rps.up_threshold) > + events = GEN6_PM_RP_UP_THRESHOLD; > + else if (c0 < time * dev_priv->rps.down_threshold) > + events = GEN6_PM_RP_DOWN_THRESHOLD; > } > > + dev_priv->rps.ei = now; > return events; > } > > @@ -4284,7 +4267,7 @@ void intel_irq_init(struct drm_i915_private *dev_priv) > /* Let's track the enabled rps events */ > if (IS_VALLEYVIEW(dev_priv)) > /* WaGsvRC0ResidencyMethod:vlv */ > - dev_priv->pm_rps_events = GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED; > + dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED; > else > dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS; > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 99e09f63d4b3..4db3a792c6c2 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -5166,8 +5166,9 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) > { > u32 mask = 0; > > + /* We use UP_EI_EXPIRED interupts for both up/down in manual mode */ > if (val > dev_priv->rps.min_freq_softlimit) > - mask |= GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; > + mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT; > if (val < dev_priv->rps.max_freq_softlimit) > mask |= GEN6_PM_RP_UP_EI_EXPIRED | GEN6_PM_RP_UP_THRESHOLD; > > @@ -5277,7 +5278,7 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) > if (dev_priv->rps.enabled) { > u8 freq; > > - if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) > + if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED) > gen6_rps_reset_ei(dev_priv); > I915_WRITE(GEN6_PMINTRMSK, > gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq)); > -- > 2.11.0