[PATCH 2/2] drm/i915: Tweak RPS thresholds to more aggressively downclock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



After applying wait-boost we often find ourselves stuck at higher clocks
than required. The current threshold value requires the GPU to be
continuously and completely idle for 313ms before it is dropped by one
bin. Conversely, we require the GPU to be busy for an average of 90% over
a 84ms period before we upclock. So the current thresholds almost never
downclock the GPU, and respond very slowly to sudden demands for more
power. It is easy to observe that we currently lock into the wrong bin
and both underperform in benchmarks and consume more power than optimal
(just by repeating the task and measuring the different results).

A more sensible approach, as discussed in the bspec, is to use a
continuous threshold for upclocking, and an average value for downclocking.
However, we need to balance being overagressive and reclocking too
often, both for nearly idle workloads and the most demanding of games.

v2: Use 3 power zones to keep frequencies low in steady-state mostly
idle (e.g. scrolling, interactive 2D drawing), and frequencies high
for demanding games. In between those end-states, we use a
fast-reclocking algorithm to converge more quickly on the desired bin.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Kenneth Graunke <kenneth@xxxxxxxxxxxxx>
Cc: Stéphane Marchesin <stephane.marchesin@xxxxxxxxx>
Cc: Owen Taylor <otaylor@xxxxxxxxxx>
Cc: "Meng, Mengmeng" <mengmeng.meng@xxxxxxxxx>
Cc: "Zhuang, Lena" <lena.zhuang@xxxxxxxxx>
---
 drivers/gpu/drm/i915/i915_drv.h |   4 ++
 drivers/gpu/drm/i915/i915_irq.c |  43 ++++++++++----
 drivers/gpu/drm/i915/i915_reg.h |   2 +-
 drivers/gpu/drm/i915/intel_pm.c | 128 +++++++++++++++++++++++++++++-----------
 4 files changed, 128 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e8537d3..17f785c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -835,8 +835,12 @@ struct intel_gen6_power_mgmt {
 	u8 min_delay;
 	u8 max_delay;
 	u8 rpe_delay;
+	u8 rp1_delay;
+	u8 rp0_delay;
 	u8 hw_max;
 
+	int last_adj;
+	enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
 	atomic_t wait_boost;
 
 	struct delayed_work delayed_resume_work;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 875125f..c17736f 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -812,7 +812,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
 						    rps.work);
 	u32 pm_iir;
-	u8 new_delay;
+	int new_delay, adj;
 
 	spin_lock_irq(&dev_priv->irq_lock);
 	pm_iir = dev_priv->rps.pm_iir;
@@ -829,29 +829,46 @@ static void gen6_pm_rps_work(struct work_struct *work)
 
 	mutex_lock(&dev_priv->rps.hw_lock);
 
+	adj = dev_priv->rps.last_adj;
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
-		new_delay = dev_priv->rps.cur_delay + 1;
+		if (adj > 0)
+			adj *= 2;
+		else
+			adj = 1;
+		new_delay = dev_priv->rps.cur_delay + adj;
 
 		/*
 		 * For better performance, jump directly
 		 * to RPe if we're below it.
 		 */
-		if (IS_VALLEYVIEW(dev_priv->dev) &&
-		    dev_priv->rps.cur_delay < dev_priv->rps.rpe_delay)
+		if (new_delay < dev_priv->rps.rpe_delay)
 			new_delay = dev_priv->rps.rpe_delay;
-	} else
-		new_delay = dev_priv->rps.cur_delay - 1;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+		if (adj < 0)
+			adj *= 2;
+		else
+			adj = -1;
+		new_delay = dev_priv->rps.cur_delay + adj;
+	} else { /* Idle timeout */
+		if (dev_priv->rps.cur_delay > dev_priv->rps.rpe_delay)
+			new_delay = dev_priv->rps.rpe_delay;
+		else
+			new_delay = dev_priv->rps.min_delay;
+		adj = 0;
+	}
+	dev_priv->rps.last_adj = adj;
 
 	/* sysfs frequency interfaces may have snuck in while servicing the
 	 * interrupt
 	 */
-	if (new_delay >= dev_priv->rps.min_delay &&
-	    new_delay <= dev_priv->rps.max_delay) {
-		if (IS_VALLEYVIEW(dev_priv->dev))
-			valleyview_set_rps(dev_priv->dev, new_delay);
-		else
-			gen6_set_rps(dev_priv->dev, new_delay);
-	}
+	if (new_delay < (int)dev_priv->rps.min_delay)
+		new_delay = dev_priv->rps.min_delay;
+	if (new_delay > (int)dev_priv->rps.max_delay)
+		new_delay = dev_priv->rps.max_delay;
+	if (IS_VALLEYVIEW(dev_priv->dev))
+		valleyview_set_rps(dev_priv->dev, new_delay);
+	else
+		gen6_set_rps(dev_priv->dev, new_delay);
 
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 5bc714a..7e3efd8 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -4627,7 +4627,7 @@
 #define   GEN6_RP_UP_IDLE_MIN			(0x1<<3)
 #define   GEN6_RP_UP_BUSY_AVG			(0x2<<3)
 #define   GEN6_RP_UP_BUSY_CONT			(0x4<<3)
-#define   GEN7_RP_DOWN_IDLE_AVG			(0x2<<0)
+#define   GEN6_RP_DOWN_IDLE_AVG			(0x2<<0)
 #define   GEN6_RP_DOWN_IDLE_CONT		(0x1<<0)
 #define GEN6_RP_UP_THRESHOLD			0xA02C
 #define GEN6_RP_DOWN_THRESHOLD			0xA030
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 89b1d76..d6d00e6 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3287,6 +3287,66 @@ static u32 gen6_rps_limits(struct drm_i915_private *dev_priv, u8 *val)
 	return limits;
 }
 
+static void gen6_set_rps_power(struct drm_i915_private *dev_priv,
+			       int power)
+{
+	/* Note the units here are not exactly 1us, but 1280ns. */
+	switch ((dev_priv->rps.power = power)) {
+	case LOW_POWER:
+		/* Upclock if more than 95% busy for 32ms */
+		I915_WRITE(GEN6_RP_UP_EI, 25000);
+		I915_WRITE(GEN6_RP_UP_THRESHOLD, 24000);
+
+		/* Downclock if less than 85% busy over 52ms */
+		I915_WRITE(GEN6_RP_DOWN_EI, 40000);
+		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 34000);
+
+		I915_WRITE(GEN6_RP_CONTROL,
+			   GEN6_RP_MEDIA_TURBO |
+			   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+			   GEN6_RP_MEDIA_IS_GFX |
+			   GEN6_RP_ENABLE |
+			   GEN6_RP_UP_BUSY_AVG |
+			   GEN6_RP_DOWN_IDLE_AVG);
+		break;
+
+	case BETWEEN:
+		/* Upclock if continuously busy for 13ms */
+		I915_WRITE(GEN6_RP_UP_THRESHOLD, 10000);
+
+		/* Downclock if less than 75% idle over 64ms */
+		I915_WRITE(GEN6_RP_DOWN_EI, 50000);
+		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 37500);
+
+		I915_WRITE(GEN6_RP_CONTROL,
+			   GEN6_RP_MEDIA_TURBO |
+			   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+			   GEN6_RP_MEDIA_IS_GFX |
+			   GEN6_RP_ENABLE |
+			   GEN6_RP_UP_BUSY_CONT |
+			   GEN6_RP_DOWN_IDLE_AVG);
+		break;
+
+	case HIGH_POWER:
+		/* Upclock if more than 87.5% busy over 10ms */
+		I915_WRITE(GEN6_RP_UP_EI, 8000);
+		I915_WRITE(GEN6_RP_UP_THRESHOLD, 7000);
+
+		/* Downclock if less than 60% busy over 77ms */
+		I915_WRITE(GEN6_RP_DOWN_EI, 60000);
+		I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 36000);
+
+		I915_WRITE(GEN6_RP_CONTROL,
+			   GEN6_RP_MEDIA_TURBO |
+			   GEN6_RP_MEDIA_HW_NORMAL_MODE |
+			   GEN6_RP_MEDIA_IS_GFX |
+			   GEN6_RP_ENABLE |
+			   GEN6_RP_UP_BUSY_AVG |
+			   GEN6_RP_DOWN_IDLE_AVG);
+		break;
+	}
+}
+
 void gen6_set_rps(struct drm_device *dev, u8 val)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3299,6 +3359,25 @@ void gen6_set_rps(struct drm_device *dev, u8 val)
 	if (val == dev_priv->rps.cur_delay)
 		return;
 
+	switch (dev_priv->rps.power) {
+	case LOW_POWER:
+		if (val > dev_priv->rps.rpe_delay + 1 && val > dev_priv->rps.cur_delay)
+			gen6_set_rps_power(dev_priv, BETWEEN);
+		break;
+
+	case BETWEEN:
+		if (val <= dev_priv->rps.rpe_delay && val < dev_priv->rps.cur_delay)
+			gen6_set_rps_power(dev_priv, LOW_POWER);
+		else if (val >= dev_priv->rps.rp0_delay && val > dev_priv->rps.cur_delay)
+			gen6_set_rps_power(dev_priv, HIGH_POWER);
+		break;
+
+	case HIGH_POWER:
+		if (val < (dev_priv->rps.rp1_delay + dev_priv->rps.rp0_delay) >> 1 && val < dev_priv->rps.cur_delay)
+			gen6_set_rps_power(dev_priv, BETWEEN);
+		break;
+	}
+
 	if (IS_HASWELL(dev))
 		I915_WRITE(GEN6_RPNSWREQ,
 			   HSW_FREQUENCY(val));
@@ -3325,10 +3404,13 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
 	atomic_set(&dev_priv->rps.wait_boost, false);
 
 	mutex_lock(&dev_priv->rps.hw_lock);
-	if (dev_priv->info->is_valleyview)
+	if (dev_priv->info->is_valleyview) {
 		valleyview_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
-	else
+	} else {
 		gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
+		if (dev_priv->rps.power != LOW_POWER)
+			gen6_set_rps_power(dev_priv, LOW_POWER);
+	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
 }
 
@@ -3505,7 +3587,10 @@ static void gen6_enable_rps(struct drm_device *dev)
 
 	/* In units of 50MHz */
 	dev_priv->rps.hw_max = dev_priv->rps.max_delay = rp_state_cap & 0xff;
-	dev_priv->rps.min_delay = (rp_state_cap & 0xff0000) >> 16;
+	dev_priv->rps.min_delay = (rp_state_cap >> 16) & 0xff;
+	dev_priv->rps.rp1_delay = (rp_state_cap >>  8) & 0xff;
+	dev_priv->rps.rp0_delay = (rp_state_cap >>  0) & 0xff;
+	dev_priv->rps.rpe_delay = dev_priv->rps.rp1_delay;
 	dev_priv->rps.cur_delay = 0;
 
 	/* disable the counters and set deterministic thresholds */
@@ -3553,38 +3638,11 @@ static void gen6_enable_rps(struct drm_device *dev)
 		   GEN6_RC_CTL_EI_MODE(1) |
 		   GEN6_RC_CTL_HW_ENABLE);
 
-	if (IS_HASWELL(dev)) {
-		I915_WRITE(GEN6_RPNSWREQ,
-			   HSW_FREQUENCY(10));
-		I915_WRITE(GEN6_RC_VIDEO_FREQ,
-			   HSW_FREQUENCY(12));
-	} else {
-		I915_WRITE(GEN6_RPNSWREQ,
-			   GEN6_FREQUENCY(10) |
-			   GEN6_OFFSET(0) |
-			   GEN6_AGGRESSIVE_TURBO);
-		I915_WRITE(GEN6_RC_VIDEO_FREQ,
-			   GEN6_FREQUENCY(12));
-	}
-
-	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 1000000);
-	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
-		   dev_priv->rps.max_delay << 24 |
-		   dev_priv->rps.min_delay << 16);
-
-	I915_WRITE(GEN6_RP_UP_THRESHOLD, 59400);
-	I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 245000);
-	I915_WRITE(GEN6_RP_UP_EI, 66000);
-	I915_WRITE(GEN6_RP_DOWN_EI, 350000);
-
+	/* Power down if completely idle for over 50ms */
+	I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 50000);
 	I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
-	I915_WRITE(GEN6_RP_CONTROL,
-		   GEN6_RP_MEDIA_TURBO |
-		   GEN6_RP_MEDIA_HW_NORMAL_MODE |
-		   GEN6_RP_MEDIA_IS_GFX |
-		   GEN6_RP_ENABLE |
-		   GEN6_RP_UP_BUSY_AVG |
-		   (IS_HASWELL(dev) ? GEN7_RP_DOWN_IDLE_AVG : GEN6_RP_DOWN_IDLE_CONT));
+
+	gen6_set_rps_power(dev_priv, LOW_POWER);
 
 	ret = sandybridge_pcode_write(dev_priv, GEN6_PCODE_WRITE_MIN_FREQ_TABLE, 0);
 	if (!ret) {
@@ -3600,7 +3658,7 @@ static void gen6_enable_rps(struct drm_device *dev)
 		DRM_DEBUG_DRIVER("Failed to set the min frequency\n");
 	}
 
-	gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
+	gen6_set_rps(dev_priv->dev, dev_priv->rps.min_delay);
 
 	gen6_enable_rps_interrupts(dev);
 
-- 
1.8.4.rc3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/intel-gfx





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux