On Mon, Mar 03, 2014 at 11:35:50AM +0530, deepak.s@xxxxxxxxx wrote: > From: Deepak S <deepak.s@xxxxxxxxx> > > With RC6 enabled, BYT has an HW issue in determining the right > Gfx busyness. > WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide > on increasing/decreasing the freq. This logic will monitor C0 > counters of render/media power-wells over EI period and takes > necessary action based on these values > > v2: Refactor duplicate code. (ville) > > Signed-off-by: Deepak S <deepak.s@xxxxxxxxx> Did we reach some conclusion about this approach? It seemed to save power in some workloads at least, but there's still the question whether it ramps up the frquency fast enoguh to provide a good user experience. Maybe we should make it optional even on VLV? > > --- > drivers/gpu/drm/i915/i915_drv.h | 19 ++++++ > drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++-- > drivers/gpu/drm/i915/i915_reg.h | 15 +++++ > drivers/gpu/drm/i915/intel_pm.c | 50 ++++++++++---- > 4 files changed, 213 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 728b9c3..2baeeef 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -957,6 +957,12 @@ struct i915_suspend_saved_registers { > u32 savePCH_PORT_HOTPLUG; > }; > > +struct intel_rps_ei_calc { > + u32 cz_ts_ei; > + u32 render_ei_c0; > + u32 media_ei_c0; > +}; > + > struct intel_gen6_power_mgmt { > /* work and pm_iir are protected by dev_priv->irq_lock */ > struct work_struct work; > @@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt { > u8 rp1_delay; > u8 rp0_delay; > u8 hw_max; > + u8 hw_min; Some leftover still? > > bool rp_up_masked; > bool rp_down_masked; > > + u32 cz_freq; This too seems unused. > + u32 ei_interrupt_count; > + > + bool use_RC0_residency_for_turbo; > + > int last_adj; > enum { LOW_POWER, BETWEEN, HIGH_POWER } power; > > @@ -1531,6 +1543,13 @@ typedef struct drm_i915_private { > /* gen6+ rps state */ > struct intel_gen6_power_mgmt rps; > > + /* rps wa up ei calculation */ > + struct intel_rps_ei_calc rps_up_ei; > + > + /* rps wa down ei calculation */ > + struct intel_rps_ei_calc rps_down_ei; > + > + > /* ilk-only ips/rps state. Everything in here is protected by the global > * mchdev_lock in intel_pm.c */ > struct intel_ilk_power_mgmt ips; > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 56edff3..93b6ebf 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv, > } > } > > +static u32 vlv_c0_residency(struct drm_i915_private *dev_priv, > + struct intel_rps_ei_calc *rps_ei) > +{ > + u32 cz_ts, cz_freq_khz; > + u32 render_count, media_count; > + u32 elapsed_render, elapsed_media, elapsed_time; > + u32 residency = 0; > + > + cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP); > + cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4); > + > + render_count = I915_READ(VLV_RENDER_C0_COUNT_REG); > + media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG); > + > + if (rps_ei->cz_ts_ei == 0) { > + rps_ei->cz_ts_ei = cz_ts; > + rps_ei->render_ei_c0 = render_count; > + rps_ei->media_ei_c0 = media_count; > + > + return dev_priv->rps.cur_delay; > + } > + > + elapsed_time = cz_ts - rps_ei->cz_ts_ei; > + rps_ei->cz_ts_ei = cz_ts; > + > + elapsed_render = render_count - rps_ei->render_ei_c0; > + rps_ei->render_ei_c0 = render_count; > + > + elapsed_media = media_count - rps_ei->media_ei_c0; > + rps_ei->media_ei_c0 = media_count; > + > + /* Convert all the counters into common unit of milli sec */ > + elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC; > + elapsed_render /= cz_freq_khz; > + elapsed_media /= cz_freq_khz; > + > + /* Calculate overall C0 residency percentage only > + * if elapsed time is non zero > + */ Badly formatted comment. > + if (elapsed_time) { > + residency = > + ((max(elapsed_render, elapsed_media) * 100) > + / elapsed_time); > + } > + > + return residency; > +} > + > + > +/** > + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU > + * busy-ness calculated from C0 counters of render & media power wells > + * @dev_priv: DRM device private > + * > + */ > +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) > +{ > + u32 residency_C0_up = 0, residency_C0_down = 0; > + u8 new_delay; > + > + dev_priv->rps.ei_interrupt_count++; > + > + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); > + > + > + if (dev_priv->rps_up_ei.cz_ts_ei == 0) { > + vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei); > + vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei); > + return dev_priv->rps.cur_delay; > + } > + > + > + /* To down throttle, C0 residency should be less than down threshold > + * for continous EI intervals. So calculate down EI counters > + * once in VLV_INT_COUNT_FOR_DOWN_EI > + */ Badly formatted comment. > + if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) { > + > + dev_priv->rps.ei_interrupt_count = 0; > + > + residency_C0_down = vlv_c0_residency(dev_priv, ^ Extra space we don't need > + &dev_priv->rps_down_ei); > + } else { > + residency_C0_up = vlv_c0_residency(dev_priv, ^ Another > + &dev_priv->rps_up_ei); > + } > + > + new_delay = dev_priv->rps.cur_delay; > + > + /* C0 residency is greater than UP threshold. Increase Frequency */ > + if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) { > + > + if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay) > + new_delay = dev_priv->rps.cur_delay + 1; > + > + /* > + * For better performance, jump directly > + * to RPe if we're below it. > + */ > + if (new_delay < dev_priv->rps.rpe_delay) > + new_delay = dev_priv->rps.rpe_delay; > + > + } else if (!dev_priv->rps.ei_interrupt_count && > + (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) { > + /* This means, C0 residency is less than down threshold over > + * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq > + */ Comment is badly formatted. > + if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay) > + new_delay = dev_priv->rps.cur_delay - 1; > + } > + > + return new_delay; > +} > + > static void gen6_pm_rps_work(struct work_struct *work) > { > drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, > @@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work) > pm_iir = dev_priv->rps.pm_iir; > dev_priv->rps.pm_iir = 0; > /* Make sure not to corrupt PMIMR state used by ringbuffer code */ > - snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED); > + else > + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); This pattern keeps reeating many times in the patch. Maybe it would be better to track the enabled PM interrupts in dev_priv somewhere, and use that instead of GEN6_PM_RPS_EVENTS vs. GEN6_PM_RP_UP_EI_EXPIRED everywhere. Maybe call it dev_priv->pm_rps_events to keep in line with the GEN6_PM_RPS_EVENTS name. I'd make it a separate preparation patch. > spin_unlock_irq(&dev_priv->irq_lock); > > /* Make sure we didn't queue anything we're not going to process. */ > - WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS); > + WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)); > > - if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0) > + if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) > return; > > mutex_lock(&dev_priv->rps.hw_lock); > @@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work) > else > new_delay = dev_priv->rps.min_delay; > adj = 0; > + } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { > + new_delay = vlv_calc_delay_from_C0_counters(dev_priv); > } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { > if (adj < 0) > adj *= 2; > @@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) > queue_work(dev_priv->wq, &dev_priv->rps.work); > } > > + if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { > + spin_lock(&dev_priv->irq_lock); > + dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED; > + snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED); > + spin_unlock(&dev_priv->irq_lock); > + DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo"); > + > + queue_work(dev_priv->wq, &dev_priv->rps.work); > + } > + > if (HAS_VEBOX(dev_priv->dev)) { > if (pm_iir & PM_VEBOX_USER_INTERRUPT) > notify_ring(dev_priv->dev, &dev_priv->ring[VECS]); > @@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) > if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) > gmbus_irq_handler(dev); > > - if (pm_iir) > + if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) > gen6_rps_irq_handler(dev_priv, pm_iir); > > I915_WRITE(GTIIR, gt_iir); > @@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) > pm_irqs |= PM_VEBOX_USER_INTERRUPT; > > dev_priv->pm_irq_mask = 0xffffffff; > + > + if (dev_priv->rps.use_RC0_residency_for_turbo) { > + dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED; > + pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED; > + } else { > + dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS; > + pm_irqs |= GEN6_PM_RPS_EVENTS; > + } > + > I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR)); > I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask); > I915_WRITE(GEN6_PMIER, pm_irqs); > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index f73a49d..e58b37e 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -391,6 +391,7 @@ > #define PUNIT_REG_GPU_FREQ_STS 0xd8 > #define GENFREQSTATUS (1<<0) > #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc > +#define PUNIT_REG_CZ_TIMESTAMP 0xce > > #define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */ > #define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */ > @@ -406,6 +407,11 @@ > #define FB_FMAX_VMIN_FREQ_LO_SHIFT 27 > #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000 > > +#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000 > +#define VLV_RP_UP_EI_THRESHOLD 90 > +#define VLV_RP_DOWN_EI_THRESHOLD 70 > +#define VLV_INT_COUNT_FOR_DOWN_EI 5 > + > /* vlv2 north clock has */ > #define CCK_FUSE_REG 0x8 > #define CCK_FUSE_HPLL_FREQ_MASK 0x3 > @@ -4857,6 +4863,7 @@ > #define VLV_GTLC_PW_STATUS 0x130094 > #define VLV_GTLC_PW_RENDER_STATUS_MASK 0x80 > #define VLV_GTLC_PW_MEDIA_STATUS_MASK 0x20 > +#define VLV_GTLC_SURVIVABILITY_REG 0x130098 > #define FORCEWAKE_MT 0xa188 /* multi-threaded */ > #define FORCEWAKE_KERNEL 0x1 > #define FORCEWAKE_USER 0x2 > @@ -4864,6 +4871,11 @@ > #define ECOBUS 0xa180 > #define FORCEWAKE_MT_ENABLE (1<<5) > > +#define VLV_GFX_CLK_FORCE_ON_BIT (1<<2) > +#define VLV_GFX_CLK_STATUS_BIT (1<<3) Leftovers from somewhere. > + > +#define VLV_RC_COUNTER_CONTROL 0xFFFF00FF This should be below the register define, but it would be better to use the names of the bits properly. Also do we really want to enable all of the counters when you only use the rc0 counters? I'm also wondering why we're currently enabling the rc6 counters. I don't see that listed as a requirement for rc6 to work in any document, and we don't seem to expose those counters through debugfs either. > + > #define GTFIFODBG 0x120000 > #define GT_FIFO_SBDROPERR (1<<6) > #define GT_FIFO_BLOBDROPERR (1<<5) > @@ -4979,6 +4991,9 @@ > #define VLV_GFX_CLK_STATUS_BIT (1<<3) > #define VLV_GFX_CLK_FORCE_ON_BIT (1<<2) > > +#define VLV_RENDER_C0_COUNT_REG 0x138118 > +#define VLV_MEDIA_C0_COUNT_REG 0x13811C Maybe put these in the correct numerical place between GEN6_GT_GFX_RC6pp and GEN6_PCODE_MAILBOX. > + > #define GEN6_GT_GFX_RC6_LOCKED 0x138104 > #define VLV_COUNTER_CONTROL 0x138104 > #define VLV_COUNT_RANGE_HIGH (1<<15) > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c > index 9ab3883..8002ac7 100644 > --- a/drivers/gpu/drm/i915/intel_pm.c > +++ b/drivers/gpu/drm/i915/intel_pm.c > @@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) > I915_READ(VLV_GTLC_SURVIVABILITY_REG) & > ~VLV_GFX_CLK_FORCE_ON_BIT); > > - /* Unmask Up interrupts */ > - dev_priv->rps.rp_up_masked = true; > - gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD, > + /* Unmask Turbo interrupts */ > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED); > + else { > + dev_priv->rps.rp_up_masked = true; > + gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD, > dev_priv->rps.min_delay); > + } > } > > void gen6_rps_idle(struct drm_i915_private *dev_priv) > @@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > > I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); > - I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) { > + I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & > + ~GEN6_PM_RP_UP_EI_EXPIRED); > + } else { > + I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & > + ~GEN6_PM_RPS_EVENTS); > + } > /* Complete PM interrupt masking here doesn't race with the rps work > * item again unmasking PM interrupts because that is using a different > * register (PMIMR) to mask PM interrupts. The only risk is in leaving > @@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev) > dev_priv->rps.pm_iir = 0; > spin_unlock_irq(&dev_priv->irq_lock); > > - I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED); > + else > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > } > > static void gen6_disable_rps(struct drm_device *dev) > @@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev) > struct drm_i915_private *dev_priv = dev->dev_private; > u32 enabled_intrs; > > + /* Clear out any stale interrupts first */ > spin_lock_irq(&dev_priv->irq_lock); > WARN_ON(dev_priv->rps.pm_iir); > - snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); > - I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > + if (dev_priv->rps.use_RC0_residency_for_turbo) { > + snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED); > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED); > + } else { > + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); > + I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); > + } > spin_unlock_irq(&dev_priv->irq_lock); > > /* only unmask PM interrupts we need. Mask all others. */ > - enabled_intrs = GEN6_PM_RPS_EVENTS; > + if (dev_priv->rps.use_RC0_residency_for_turbo) > + enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED; > + else > + enabled_intrs = GEN6_PM_RPS_EVENTS; > > /* IVB and SNB hard hangs on looping batchbuffer > * if GEN6_PM_UP_EI_EXPIRED is masked. > */ > - if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) > + if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) && > + !dev_priv->rps.use_RC0_residency_for_turbo) > enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED; > > I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs); > @@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev) > I915_WRITE(GEN6_RP_DOWN_EI, 350000); > > I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); > + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); > > I915_WRITE(GEN6_RP_CONTROL, > GEN6_RP_MEDIA_TURBO | > @@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev) > I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); > > /* allows RC6 residency counter to work */ > - I915_WRITE(VLV_COUNTER_CONTROL, > - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | > - VLV_MEDIA_RC6_COUNT_EN | > - VLV_RENDER_RC6_COUNT_EN)); > + I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL); > if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) > rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; > > @@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev) > dev_priv->rps.rp_up_masked = false; > dev_priv->rps.rp_down_masked = false; > > + /* enable WA for RC6+turbo to work together */ > + dev_priv->rps.use_RC0_residency_for_turbo = true; > + > gen6_enable_rps_interrupts(dev); > > gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); > -- > 1.8.5.2 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel OTC _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx