From: Deepak S <deepak.s@xxxxxxxxx>
With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values
v2: Refactor duplicate code. (ville)
Signed-off-by: Deepak S <deepak.s@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_drv.h | 19 ++++++
drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++++++++++++++++--
drivers/gpu/drm/i915/i915_reg.h | 15 +++++
drivers/gpu/drm/i915/intel_pm.c | 50 ++++++++++----
4 files changed, 213 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 728b9c3..2baeeef 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -957,6 +957,12 @@ struct i915_suspend_saved_registers {
u32 savePCH_PORT_HOTPLUG;
};
+struct intel_rps_ei_calc {
+ u32 cz_ts_ei;
+ u32 render_ei_c0;
+ u32 media_ei_c0;
+};
+
struct intel_gen6_power_mgmt {
/* work and pm_iir are protected by dev_priv->irq_lock */
struct work_struct work;
@@ -969,10 +975,16 @@ struct intel_gen6_power_mgmt {
u8 rp1_delay;
u8 rp0_delay;
u8 hw_max;
+ u8 hw_min;
bool rp_up_masked;
bool rp_down_masked;
+ u32 cz_freq;
+ u32 ei_interrupt_count;
+
+ bool use_RC0_residency_for_turbo;
+
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
@@ -1531,6 +1543,13 @@ typedef struct drm_i915_private {
/* gen6+ rps state */
struct intel_gen6_power_mgmt rps;
+ /* rps wa up ei calculation */
+ struct intel_rps_ei_calc rps_up_ei;
+
+ /* rps wa down ei calculation */
+ struct intel_rps_ei_calc rps_down_ei;
+
+
/* ilk-only ips/rps state. Everything in here is protected by the global
* mchdev_lock in intel_pm.c */
struct intel_ilk_power_mgmt ips;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 56edff3..93b6ebf 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1023,6 +1023,120 @@ void gen6_set_pm_mask(struct drm_i915_private *dev_priv,
}
}
+static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
+ struct intel_rps_ei_calc *rps_ei)
+{
+ u32 cz_ts, cz_freq_khz;
+ u32 render_count, media_count;
+ u32 elapsed_render, elapsed_media, elapsed_time;
+ u32 residency = 0;
+
+ cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
+ cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
+
+ render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
+ media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
+
+ if (rps_ei->cz_ts_ei == 0) {
+ rps_ei->cz_ts_ei = cz_ts;
+ rps_ei->render_ei_c0 = render_count;
+ rps_ei->media_ei_c0 = media_count;
+
+ return dev_priv->rps.cur_delay;
+ }
+
+ elapsed_time = cz_ts - rps_ei->cz_ts_ei;
+ rps_ei->cz_ts_ei = cz_ts;
+
+ elapsed_render = render_count - rps_ei->render_ei_c0;
+ rps_ei->render_ei_c0 = render_count;
+
+ elapsed_media = media_count - rps_ei->media_ei_c0;
+ rps_ei->media_ei_c0 = media_count;
+
+ /* Convert all the counters into common unit of milli sec */
+ elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
+ elapsed_render /= cz_freq_khz;
+ elapsed_media /= cz_freq_khz;
+
+ /* Calculate overall C0 residency percentage only
+ * if elapsed time is non zero
+ */
+ if (elapsed_time) {
+ residency =
+ ((max(elapsed_render, elapsed_media) * 100)
+ / elapsed_time);
+ }
+
+ return residency;
+}
+
+
+/**
+ * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
+ * busy-ness calculated from C0 counters of render & media power wells
+ * @dev_priv: DRM device private
+ *
+ */
+static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+{
+ u32 residency_C0_up = 0, residency_C0_down = 0;
+ u8 new_delay;
+
+ dev_priv->rps.ei_interrupt_count++;
+
+ WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
+
+
+ if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
+ vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
+ vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
+ return dev_priv->rps.cur_delay;
+ }
+
+
+ /* To down throttle, C0 residency should be less than down threshold
+ * for continous EI intervals. So calculate down EI counters
+ * once in VLV_INT_COUNT_FOR_DOWN_EI
+ */
+ if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
+
+ dev_priv->rps.ei_interrupt_count = 0;
+
+ residency_C0_down = vlv_c0_residency(dev_priv,
+ &dev_priv->rps_down_ei);
+ } else {
+ residency_C0_up = vlv_c0_residency(dev_priv,
+ &dev_priv->rps_up_ei);
+ }
+
+ new_delay = dev_priv->rps.cur_delay;
+
+ /* C0 residency is greater than UP threshold. Increase Frequency */
+ if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
+
+ if (dev_priv->rps.cur_delay < dev_priv->rps.max_delay)
+ new_delay = dev_priv->rps.cur_delay + 1;
+
+ /*
+ * For better performance, jump directly
+ * to RPe if we're below it.
+ */
+ if (new_delay < dev_priv->rps.rpe_delay)
+ new_delay = dev_priv->rps.rpe_delay;
+
+ } else if (!dev_priv->rps.ei_interrupt_count &&
+ (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
+ /* This means, C0 residency is less than down threshold over
+ * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
+ */
+ if (dev_priv->rps.cur_delay > dev_priv->rps.min_delay)
+ new_delay = dev_priv->rps.cur_delay - 1;
+ }
+
+ return new_delay;
+}
+
static void gen6_pm_rps_work(struct work_struct *work)
{
drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
@@ -1034,13 +1148,16 @@ static void gen6_pm_rps_work(struct work_struct *work)
pm_iir = dev_priv->rps.pm_iir;
dev_priv->rps.pm_iir = 0;
/* Make sure not to corrupt PMIMR state used by ringbuffer code */
- snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+ if (dev_priv->rps.use_RC0_residency_for_turbo)
+ snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
+ else
+ snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
spin_unlock_irq(&dev_priv->irq_lock);
/* Make sure we didn't queue anything we're not going to process. */
- WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
+ WARN_ON(pm_iir & ~(GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED));
- if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
+ if ((pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED)) == 0)
return;
mutex_lock(&dev_priv->rps.hw_lock);
@@ -1065,6 +1182,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
else
new_delay = dev_priv->rps.min_delay;
adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+ new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
adj *= 2;
@@ -1466,6 +1585,16 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
queue_work(dev_priv->wq, &dev_priv->rps.work);
}
+ if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
+ spin_lock(&dev_priv->irq_lock);
+ dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RP_UP_EI_EXPIRED;
+ snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RP_UP_EI_EXPIRED);
+ spin_unlock(&dev_priv->irq_lock);
+ DRM_DEBUG_DRIVER("\nQueueing RPS Work - RC6 WA Turbo");
+
+ queue_work(dev_priv->wq, &dev_priv->rps.work);
+ }
+
if (HAS_VEBOX(dev_priv->dev)) {
if (pm_iir & PM_VEBOX_USER_INTERRUPT)
notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
@@ -1546,7 +1675,7 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
gmbus_irq_handler(dev);
- if (pm_iir)
+ if (pm_iir & (GEN6_PM_RPS_EVENTS | GEN6_PM_RP_UP_EI_EXPIRED))
gen6_rps_irq_handler(dev_priv, pm_iir);
I915_WRITE(GTIIR, gt_iir);
@@ -2861,6 +2990,15 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev)
pm_irqs |= PM_VEBOX_USER_INTERRUPT;
dev_priv->pm_irq_mask = 0xffffffff;
+
+ if (dev_priv->rps.use_RC0_residency_for_turbo) {
+ dev_priv->pm_irq_mask &= ~GEN6_PM_RP_UP_EI_EXPIRED;
+ pm_irqs |= GEN6_PM_RP_UP_EI_EXPIRED;
+ } else {
+ dev_priv->pm_irq_mask &= ~GEN6_PM_RPS_EVENTS;
+ pm_irqs |= GEN6_PM_RPS_EVENTS;
+ }
+
I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
I915_WRITE(GEN6_PMIER, pm_irqs);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f73a49d..e58b37e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -391,6 +391,7 @@
#define PUNIT_REG_GPU_FREQ_STS 0xd8
#define GENFREQSTATUS (1<<0)
#define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc
+#define PUNIT_REG_CZ_TIMESTAMP 0xce
#define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */
#define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */
@@ -406,6 +407,11 @@
#define FB_FMAX_VMIN_FREQ_LO_SHIFT 27
#define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000
+#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
+#define VLV_RP_UP_EI_THRESHOLD 90
+#define VLV_RP_DOWN_EI_THRESHOLD 70
+#define VLV_INT_COUNT_FOR_DOWN_EI 5
+
/* vlv2 north clock has */
#define CCK_FUSE_REG 0x8
#define CCK_FUSE_HPLL_FREQ_MASK 0x3
@@ -4857,6 +4863,7 @@
#define VLV_GTLC_PW_STATUS 0x130094
#define VLV_GTLC_PW_RENDER_STATUS_MASK 0x80
#define VLV_GTLC_PW_MEDIA_STATUS_MASK 0x20
+#define VLV_GTLC_SURVIVABILITY_REG 0x130098
#define FORCEWAKE_MT 0xa188 /* multi-threaded */
#define FORCEWAKE_KERNEL 0x1
#define FORCEWAKE_USER 0x2
@@ -4864,6 +4871,11 @@
#define ECOBUS 0xa180
#define FORCEWAKE_MT_ENABLE (1<<5)
+#define VLV_GFX_CLK_FORCE_ON_BIT (1<<2)
+#define VLV_GFX_CLK_STATUS_BIT (1<<3)
+
+#define VLV_RC_COUNTER_CONTROL 0xFFFF00FF
+
#define GTFIFODBG 0x120000
#define GT_FIFO_SBDROPERR (1<<6)
#define GT_FIFO_BLOBDROPERR (1<<5)
@@ -4979,6 +4991,9 @@
#define VLV_GFX_CLK_STATUS_BIT (1<<3)
#define VLV_GFX_CLK_FORCE_ON_BIT (1<<2)
+#define VLV_RENDER_C0_COUNT_REG 0x138118
+#define VLV_MEDIA_C0_COUNT_REG 0x13811C
+
#define GEN6_GT_GFX_RC6_LOCKED 0x138104
#define VLV_COUNTER_CONTROL 0x138104
#define VLV_COUNT_RANGE_HIGH (1<<15)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 9ab3883..8002ac7 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3084,10 +3084,14 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
I915_READ(VLV_GTLC_SURVIVABILITY_REG) &
~VLV_GFX_CLK_FORCE_ON_BIT);
- /* Unmask Up interrupts */
- dev_priv->rps.rp_up_masked = true;
- gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
+ /* Unmask Turbo interrupts */
+ if (dev_priv->rps.use_RC0_residency_for_turbo)
+ I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RP_UP_EI_EXPIRED);
+ else {
+ dev_priv->rps.rp_up_masked = true;
+ gen6_set_pm_mask(dev_priv, GEN6_PM_RP_DOWN_THRESHOLD,
dev_priv->rps.min_delay);
+ }
}
void gen6_rps_idle(struct drm_i915_private *dev_priv)
@@ -3148,7 +3152,13 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
- I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
+ if (dev_priv->rps.use_RC0_residency_for_turbo) {
+ I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
+ ~GEN6_PM_RP_UP_EI_EXPIRED);
+ } else {
+ I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) &
+ ~GEN6_PM_RPS_EVENTS);
+ }
/* Complete PM interrupt masking here doesn't race with the rps work
* item again unmasking PM interrupts because that is using a different
* register (PMIMR) to mask PM interrupts. The only risk is in leaving
@@ -3158,7 +3168,10 @@ static void gen6_disable_rps_interrupts(struct drm_device *dev)
dev_priv->rps.pm_iir = 0;
spin_unlock_irq(&dev_priv->irq_lock);
- I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+ if (dev_priv->rps.use_RC0_residency_for_turbo)
+ I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
+ else
+ I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
}
static void gen6_disable_rps(struct drm_device *dev)
@@ -3228,19 +3241,29 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
u32 enabled_intrs;
+ /* Clear out any stale interrupts first */
spin_lock_irq(&dev_priv->irq_lock);
WARN_ON(dev_priv->rps.pm_iir);
- snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
- I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+ if (dev_priv->rps.use_RC0_residency_for_turbo) {
+ snb_enable_pm_irq(dev_priv, GEN6_PM_RP_UP_EI_EXPIRED);
+ I915_WRITE(GEN6_PMIIR, GEN6_PM_RP_UP_EI_EXPIRED);
+ } else {
+ snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+ I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+ }
spin_unlock_irq(&dev_priv->irq_lock);
/* only unmask PM interrupts we need. Mask all others. */
- enabled_intrs = GEN6_PM_RPS_EVENTS;
+ if (dev_priv->rps.use_RC0_residency_for_turbo)
+ enabled_intrs = GEN6_PM_RP_UP_EI_EXPIRED;
+ else
+ enabled_intrs = GEN6_PM_RPS_EVENTS;
/* IVB and SNB hard hangs on looping batchbuffer
* if GEN6_PM_UP_EI_EXPIRED is masked.
*/
- if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
+ if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev) &&
+ !dev_priv->rps.use_RC0_residency_for_turbo)
enabled_intrs |= GEN6_PM_RP_UP_EI_EXPIRED;
I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs);
@@ -3608,6 +3631,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
I915_WRITE(GEN6_RP_DOWN_EI, 350000);
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
+ I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);
I915_WRITE(GEN6_RP_CONTROL,
GEN6_RP_MEDIA_TURBO |
@@ -3627,10 +3651,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
I915_WRITE(GEN6_RC6_THRESHOLD, 0x557);
/* allows RC6 residency counter to work */
- I915_WRITE(VLV_COUNTER_CONTROL,
- _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
- VLV_MEDIA_RC6_COUNT_EN |
- VLV_RENDER_RC6_COUNT_EN));
+ I915_WRITE(VLV_COUNTER_CONTROL, VLV_RC_COUNTER_CONTROL);
if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;
@@ -3673,6 +3694,9 @@ static void valleyview_enable_rps(struct drm_device *dev)
dev_priv->rps.rp_up_masked = false;
dev_priv->rps.rp_down_masked = false;
+ /* enable WA for RC6+turbo to work together */
+ dev_priv->rps.use_RC0_residency_for_turbo = true;
+
gen6_enable_rps_interrupts(dev);
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);