From: Deepak S <deepak.s@xxxxxxxxx> With RC6 enabled, BYT has an HW issue in determining the right Gfx busyness. WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide on increasing/decreasing the freq. This logic will monitor C0 counters of render/media power-wells over EI period and takes necessary action based on these values v2: resolved conflict in i915_reg.h Signed-off-by: Deepak S <deepak.s@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 13 ++++ drivers/gpu/drm/i915/i915_irq.c | 151 ++++++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 19 +++++ drivers/gpu/drm/i915/intel_pm.c | 54 ++++++++++---- 4 files changed, 220 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e89b9f4..1d76461 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -942,10 +942,23 @@ struct intel_gen6_power_mgmt { u8 rp1_delay; u8 rp0_delay; u8 hw_max; + u8 hw_min; bool rp_up_masked; bool rp_down_masked; + u32 cz_freq; + u32 ei_interrupt_count; + + u32 cz_ts_up_ei; + u32 render_up_EI_C0; + u32 media_up_EI_C0; + u32 cz_ts_down_ei; + u32 render_down_EI_C0; + u32 media_down_EI_C0; + + bool use_RC0_residency_for_turbo; + int last_adj; enum { LOW_POWER, BETWEEN, HIGH_POWER } power; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d0d87ed..f4a3660 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -965,6 +965,123 @@ static void notify_ring(struct drm_device *dev, i915_queue_hangcheck(dev); } +/** + * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU + * busy-ness calculated from C0 counters of render & media power wells + * @dev_priv: DRM device private + * + */ +static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv) +{ + u32 cz_ts = 0; + u32 render_count = 0, media_count = 0; + u32 elapsed_render = 0, elapsed_media = 0; + u32 elapsed_time = 0; + u32 residency_C0_up = 0, residency_C0_down = 0; + u8 new_delay; + + dev_priv->rps.ei_interrupt_count++; + + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); + + cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP); + + render_count = I915_READ(VLV_RENDER_C0_COUNT_REG); + media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG); + + if (0 == dev_priv->rps.cz_ts_up_ei) { + + dev_priv->rps.cz_ts_up_ei = dev_priv->rps.cz_ts_down_ei = cz_ts; + dev_priv->rps.render_up_EI_C0 = dev_priv->rps.render_down_EI_C0 + = render_count; + dev_priv->rps.media_up_EI_C0 = dev_priv->rps.media_down_EI_C0 + = media_count; + + return dev_priv->rps.cur_delay; + } + + elapsed_time = cz_ts - dev_priv->rps.cz_ts_up_ei; + dev_priv->rps.cz_ts_up_ei = cz_ts; + + elapsed_render = render_count - dev_priv->rps.render_up_EI_C0; + dev_priv->rps.render_up_EI_C0 = render_count; + + elapsed_media = media_count - dev_priv->rps.media_up_EI_C0; + dev_priv->rps.media_up_EI_C0 = media_count; + + /* Convert all the counters into common unit of milli sec */ + elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC; + elapsed_render /= (dev_priv->rps.cz_freq / 1000); + elapsed_media /= (dev_priv->rps.cz_freq / 1000); + + /* Calculate overall C0 residency percentage only + * if elapsed time is non zero + */ + if (elapsed_time) { + residency_C0_up = ((max(elapsed_render, elapsed_media) + * 100) / elapsed_time); + } + + /* To down throttle, C0 residency should be less than down threshold + * for continous EI intervals. So calculate down EI counters + * once in VLV_INT_COUNT_FOR_DOWN_EI + */ + if (VLV_INT_COUNT_FOR_DOWN_EI == dev_priv->rps.ei_interrupt_count) { + + dev_priv->rps.ei_interrupt_count = 0; + + elapsed_time = cz_ts - dev_priv->rps.cz_ts_down_ei; + dev_priv->rps.cz_ts_down_ei = cz_ts; + + elapsed_render = render_count - dev_priv->rps.render_down_EI_C0; + dev_priv->rps.render_down_EI_C0 = render_count; + + elapsed_media = media_count - dev_priv->rps.media_down_EI_C0; + dev_priv->rps.media_down_EI_C0 = media_count; + + /* Convert all the counters into common unit of milli sec */ + elapsed_time /= 100000; + elapsed_render /= (dev_priv->rps.cz_freq / 1000); + elapsed_media /= (dev_priv->rps.cz_freq / 1000); + + /* Calculate overall C0 residency percentage only + * if elapsed time is non zero + */ + if (elapsed_time) { + residency_C0_down = + ((max(elapsed_render, elapsed_media) * 100) + / elapsed_time); + } + + } + + new_delay = dev_priv->rps.cur_delay; + + /* C0 residency is greater than UP threshold. Increase Frequency */ + if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) { + + if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay) + new_delay = dev_priv->rps.cur_delay + 1; + + /* + * For better performance, jump directly + * to RPe if we're below it. + */ + if (new_delay < dev_priv->rps.rpe_delay) + new_delay = dev_priv->rps.rpe_delay; + + } else if (!dev_priv->rps.ei_interrupt_count && + (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) { + /* This means, C0 residency is less than down threshold over + * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq + */ + if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay) + new_delay = dev_priv->rps.cur_delay - 1; + } + + return new_delay; +} + static void gen6_pm_rps_work(struct work_struct *work) { drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t, @@ -976,15 +1093,18 @@ static void gen6_pm_rps_work(struct work_struct *work) pm_iir = dev_priv->rps.pm_iir; dev_priv->rps.pm_iir = 0; /* Make sure not to corrupt PMIMR state used by ringbuffer code */ - snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); + if (dev_priv->rps.use_RC0_residency_for_turbo) + snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED); + else + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); + spin_unlock_irq(&dev_priv->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ - WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS); + WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)); - if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0) + if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0) return; - mutex_lock(&dev_priv->rps.hw_lock); adj = dev_priv->rps.last_adj; @@ -1020,6 +1140,8 @@ static void gen6_pm_rps_work(struct work_struct *work) else new_delay = dev_priv->rps.min_delay; adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { + new_delay = vlv_calc_delay_from_C0_counters(dev_priv); } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { if (adj < 0) adj *= 2; @@ -1433,6 +1555,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) queue_work(dev_priv->wq, &dev_priv->rps.work); } + if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) { + spin_lock(&dev_priv->irq_lock); + dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED; + snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED); + spin_unlock(&dev_priv->irq_lock); + DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo"); + + queue_work(dev_priv->wq, &dev_priv->rps.work); + } + if (HAS_VEBOX(dev_priv->dev)) { if (pm_iir & PM_VEBOX_USER_INTERRUPT) notify_ring(dev_priv->dev, &dev_priv->ring[VECS]); @@ -1513,7 +1645,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg) if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS) gmbus_irq_handler(dev); - if (pm_iir) + if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) gen6_rps_irq_handler(dev_priv, pm_iir); I915_WRITE(GTIIR, gt_iir); @@ -2829,6 +2961,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) pm_irqs |= PM_VEBOX_USER_INTERRUPT; dev_priv->pm_irq_mask = 0xffffffff; + + if (dev_priv->rps.use_RC0_residency_for_turbo) { + dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED; + pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED; + } else { + dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS; + pm_irqs |= GEN6_PM_RPS_EVENTS; + } + I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR)); I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask); I915_WRITE(GEN6_PMIER, pm_irqs); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e1d5f31..fa083d4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -392,6 +392,7 @@ #define PUNIT_REG_GPU_FREQ_STS 0xd8 #define GENFREQSTATUS (1<<0) #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc +#define PUNIT_REG_CZ_TIMESTAMP 0xce #define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */ #define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */ @@ -407,6 +408,15 @@ #define FB_FMAX_VMIN_FREQ_LO_SHIFT 27 #define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000 +#define VLV_CZ_CLOCK_FREQ_DDR_MODE_800 200000000 +#define VLV_CZ_CLOCK_FREQ_DDR_MODE_1066 266666666 +#define VLV_CZ_CLOCK_FREQ_DDR_MODE_1333 333333333 + +#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000 +#define VLV_RP_UP_EI_THRESHOLD 90 +#define VLV_RP_DOWN_EI_THRESHOLD 70 +#define VLV_INT_COUNT_FOR_DOWN_EI 5 + /* vlv2 north clock has */ #define CCK_FUSE_REG 0x8 #define CCK_FUSE_HPLL_FREQ_MASK 0x3 @@ -4824,6 +4834,7 @@ #define FORCEWAKE_ACK 0x130090 #define VLV_GTLC_WAKE_CTRL 0x130090 #define VLV_GTLC_PW_STATUS 0x130094 +#define VLV_GTLC_SURVIVABILITY_REG 0x130098 #define VLV_GTLC_PW_RENDER_STATUS_MASK 0x80 #define VLV_GTLC_PW_MEDIA_STATUS_MASK 0x20 #define FORCEWAKE_MT 0xa188 /* multi-threaded */ @@ -4833,6 +4844,11 @@ #define ECOBUS 0xa180 #define FORCEWAKE_MT_ENABLE (1<<5) +#define VLV_GFX_CLK_FORCE_ON_BIT (1<<2) +#define VLV_GFX_CLK_STATUS_BIT (1<<3) + +#define VLV_RC_COUNTER_CONTROL 0xFFFF00FF + #define GTFIFODBG 0x120000 #define GT_FIFO_SBDROPERR (1<<6) #define GT_FIFO_BLOBDROPERR (1<<5) @@ -4948,6 +4964,9 @@ #define VLV_GFX_CLK_STATUS_BIT (1<<3) #define VLV_GFX_CLK_FORCE_ON_BIT (1<<2) +#define VLV_RENDER_C0_COUNT_REG 0x138118 +#define VLV_MEDIA_C0_COUNT_REG 0x13811C + #define GEN6_GT_GFX_RC6_LOCKED 0x138104 #define VLV_COUNTER_CONTROL 0x138104 #define VLV_COUNT_RANGE_HIGH (1<<15) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 86d87e5..1fd1a00 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3075,7 +3075,11 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) ~VLV_GFX_CLK_FORCE_ON_BIT); /* Unmask Turbo interrupts */ - I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RPS_EVENTS); + if (dev_priv->rps.use_RC0_residency_for_turbo) + I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED); + else + I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RPS_EVENTS); + } @@ -3138,7 +3142,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); - I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS); + if (dev_priv->rps.use_RC0_residency_for_turbo) { + I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & + ~GEN6_PM_RP_UP_EI_EXPIRED); + } else { + I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & + ~GEN6_PM_RPS_EVENTS); + } /* Complete PM interrupt masking here doesn't race with the rps work * item again unmasking PM interrupts because that is using a different * register (PMIMR) to mask PM interrupts. The only risk is in leaving @@ -3148,7 +3158,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev) dev_priv->rps.pm_iir = 0; spin_unlock_irq(&dev_priv->irq_lock); - I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); + if (dev_priv->rps.use_RC0_residency_for_turbo) + I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED); + else + I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); } static void gen6_disable_rps(struct drm_device *dev) @@ -3218,19 +3231,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; u32 enabled_intrs; + /* Clear out any stale interrupts first */ spin_lock_irq(&dev_priv->irq_lock); WARN_ON(dev_priv->rps.pm_iir); - snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); - I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); + if (dev_priv->rps.use_RC0_residency_for_turbo) { + snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED); + I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED); + } else { + snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS); + I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS); + } spin_unlock_irq(&dev_priv->irq_lock); /* only unmask PM interrupts we need. Mask all others. */ - enabled_intrs = GEN6_PM_RPS_EVENTS; + if (dev_priv->rps.use_RC0_residency_for_turbo) + enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED; + else + enabled_intrs = GEN6_PM_RPS_EVENTS; /* IVB and SNB hard hangs on looping batchbuffer * if GEN6_PM_UP_EI_EXPIRED is masked. */ - if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) + if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) && + !dev_priv->rps.use_RC0_residency_for_turbo) enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED; I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs); @@ -3598,6 +3621,7 @@ static void valleyview_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RP_DOWN_EI, 350000); I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240); I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | @@ -3617,10 +3641,7 @@ static void valleyview_enable_rps(struct drm_device *dev) I915_WRITE(GEN6_RC6_THRESHOLD, 0x557); /* allows RC6 residency counter to work */ - I915_WRITE(VLV_COUNTER_CONTROL, - _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH | - VLV_MEDIA_RC6_COUNT_EN | - VLV_RENDER_RC6_COUNT_EN)); + I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL); if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL; @@ -3649,7 +3670,9 @@ static void valleyview_enable_rps(struct drm_device *dev) vlv_gpu_freq(dev_priv, dev_priv->rps.rpe_delay), dev_priv->rps.rpe_delay); - dev_priv->rps.min_delay = valleyview_rps_min_freq(dev_priv); + dev_priv->rps.hw_min = valleyview_rps_min_freq(dev_priv); + + dev_priv->rps.min_delay = dev_priv->rps.hw_min; DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", vlv_gpu_freq(dev_priv, dev_priv->rps.min_delay), dev_priv->rps.min_delay); @@ -3663,6 +3686,9 @@ static void valleyview_enable_rps(struct drm_device *dev) valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay); + /* enable WA for RC6+turbo to work together */ + dev_priv->rps.use_RC0_residency_for_turbo = true; + gen6_enable_rps_interrupts(dev); gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); @@ -4954,15 +4980,19 @@ static void valleyview_init_clock_gating(struct drm_device *dev) switch ((val >> 6) & 3) { case 0: dev_priv->mem_freq = 800; + dev_priv->rps.cz_freq = VLV_CZ_CLOCK_FREQ_DDR_MODE_800; break; case 1: dev_priv->mem_freq = 1066; + dev_priv->rps.cz_freq = VLV_CZ_CLOCK_FREQ_DDR_MODE_1066; break; case 2: dev_priv->mem_freq = 1333; + dev_priv->rps.cz_freq = VLV_CZ_CLOCK_FREQ_DDR_MODE_1333; break; case 3: dev_priv->mem_freq = 1333; + dev_priv->rps.cz_freq = VLV_CZ_CLOCK_FREQ_DDR_MODE_1333; break; } DRM_DEBUG_DRIVER("DDR speed: %d MHz", dev_priv->mem_freq); -- 1.8.4.2 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx