On Tue, Mar 14, 2017 at 03:17:25PM +0200, Mika Kuoppala wrote: > The high counter value bit can be used to get 8 bits more > of range out of the same residency counter registers. Please do note that it is internally a 40bit register with a 32bit window (and a similar comment in code). > Lets toggle this bit on and off on vlv/chv while reading the > counters to push the wrap from 13 seconds to 54 minutes. > > Reported-by: Len Brown <len.brown@xxxxxxxxx> > Cc: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Ville Syrjälä <ville.syrjala@xxxxxxxxxxxxxxx> > Signed-off-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxx> > --- > drivers/gpu/drm/i915/intel_pm.c | 47 +++++++++++++++++++++++++++++++++++++---- > 1 file changed, 43 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index da742a9..7e7a8d9 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -8350,6 +8350,44 @@ void intel_pm_setup(struct drm_i915_private *dev_priv) > atomic_set(&dev_priv->pm.wakeref_count, 0); > } > > +static u64 vlv_residency_raw(struct drm_i915_private *dev_priv, > + const i915_reg_t reg) > +{ > + u32 lower, upper, tmp, saved_ctl; > + > + /* The register accessed do not need forcewake. We borrow > + * uncore lock to prevent concurrent access to range reg. > + */ > + spin_lock_irq(&dev_priv->uncore.lock); > + saved_ctl = I915_READ_FW(VLV_COUNTER_CONTROL); > + > + if (!(saved_ctl & VLV_COUNT_RANGE_HIGH)) > + I915_WRITE_FW(VLV_COUNTER_CONTROL, > + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); > + > + tmp = I915_READ_FW(reg); Do upper = READ; > + do { > + upper = tmp; tmp = upper; > + > + I915_WRITE_FW(VLV_COUNTER_CONTROL, > + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); > + lower = I915_READ_FW(reg); > + > + I915_WRITE_FW(VLV_COUNTER_CONTROL, > + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH)); > + Kill this newline, so both look equivalent (i.e. so that the write is always coupled with the read). > + tmp = I915_READ_FW(reg); upper = READ Then the lower/upper are clearly read together in the same loop, with the wraparound condition checked. > + } while (upper != tmp); > + > + if (!(saved_ctl & VLV_COUNT_RANGE_HIGH)) > + I915_WRITE_FW(VLV_COUNTER_CONTROL, > + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH)); > + > + spin_unlock_irq(&dev_priv->uncore.lock); > + > + return lower | (u64)upper << 8; > +} > + > u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, > const i915_reg_t reg) > { > @@ -8367,15 +8405,16 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv, > units = 1000; > div = dev_priv->czclk_freq; > > - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH) > - units <<= 8; > + raw_time = vlv_residency_raw(dev_priv, reg); > + goto out; > } else if (IS_GEN9_LP(dev_priv)) { > units = 1000; > div = 1200; /* 833.33ns */ > } > > - raw_time = I915_READ(reg) * units; > - ret = DIV_ROUND_UP_ULL(raw_time, div); > + raw_time = I915_READ(reg); > +out: Do we need the goto? just move this I915_READ into the branches? > + ret = DIV_ROUND_UP_ULL(raw_time * units, div); -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx