Re: [PATCH 2/2] drm/i915/pmu: Use GT parked for estimating RC6 while asleep

Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> · Fri, 2 Aug 2019 09:41:26 +0100

On 01/08/2019 19:26, Chris Wilson wrote:
As we track when we put the GT device to sleep upon idling, we can use
that callback to sample the current rc6 counters and record the
timestamp for estimating samples after that point while asleep.

Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
  drivers/gpu/drm/i915/i915_debugfs.c |  21 ++---
  drivers/gpu/drm/i915/i915_pmu.c     | 122 ++++++++++++++--------------
  drivers/gpu/drm/i915/i915_pmu.h     |   4 +-
  3 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 24787bb48c9f..a96e630d3f86 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -39,6 +39,7 @@
  #include "display/intel_psr.h"
  
  #include "gem/i915_gem_context.h"
+#include "gt/intel_gt_pm.h"
  #include "gt/intel_reset.h"
  #include "gt/uc/intel_guc_submission.h"
  
@@ -4057,13 +4058,11 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
  static int i915_forcewake_open(struct inode *inode, struct file *file)
  {
  	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
  
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	file->private_data =
-		(void *)(uintptr_t)intel_runtime_pm_get(&i915->runtime_pm);
-	intel_uncore_forcewake_user_get(&i915->uncore);
+	intel_gt_pm_get(gt);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_get(gt->uncore);
  
  	return 0;
  }
@@ -4071,13 +4070,11 @@ static int i915_forcewake_open(struct inode *inode, struct file *file)
  static int i915_forcewake_release(struct inode *inode, struct file *file)
  {
  	struct drm_i915_private *i915 = inode->i_private;
+	struct intel_gt *gt = &i915->gt;
  
-	if (INTEL_GEN(i915) < 6)
-		return 0;
-
-	intel_uncore_forcewake_user_put(&i915->uncore);
-	intel_runtime_pm_put(&i915->runtime_pm,
-			     (intel_wakeref_t)(uintptr_t)file->private_data);
+	if (INTEL_GEN(i915) >= 6)
+		intel_uncore_forcewake_user_put(&i915->uncore);
+	intel_gt_pm_put(gt);
  
  	return 0;
  }
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 4d7cabeea687..680618bd385c 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -114,17 +114,50 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active)
  	return enable;
  }
  
+static u64 __get_rc6(struct intel_gt *gt)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	u64 val;
+
+	val = intel_rc6_residency_ns(i915,
+				     IS_VALLEYVIEW(i915) ?
+				     VLV_GT_RENDER_RC6 :
+				     GEN6_GT_GFX_RC6);
+
+	if (HAS_RC6p(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+
+	if (HAS_RC6pp(i915))
+		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+
+	return val;
+}
+
  void i915_pmu_gt_parked(struct drm_i915_private *i915)
  {
+	u64 val;
+
  	if (!i915->pmu.base.event_init)
  		return;
  
+	val = 0;
+	if (i915->pmu.sample[__I915_SAMPLE_RC6].cur)
+		val = __get_rc6(&i915->gt);

The conditional could be racy outside the lock. If a parallel perf 
reader updates .cur from zero to non-zero the house keep below would see 
val as zero. Perhaps you can store val = __get_rc6 outside the lock, and 
then decide which val to use inside the lock?

+
  	spin_lock_irq(&i915->pmu.lock);
+
+	if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
+		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
+		i915->pmu.sample[__I915_SAMPLE_RC6].cur = val;
+	}
+	i915->pmu.sleep_timestamp = jiffies;

ktime would be better I think. More precision but just why use archaic 
jiffies.

+
  	/*
  	 * Signal sampling timer to stop if only engine events are enabled and
  	 * GPU went idle.
  	 */
  	i915->pmu.timer_enabled = pmu_needs_timer(i915, false);
+
  	spin_unlock_irq(&i915->pmu.lock);
  }
  
@@ -145,10 +178,23 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
  		return;
  
  	spin_lock_irq(&i915->pmu.lock);
+
  	/*
  	 * Re-enable sampling timer when GPU goes active.
  	 */
  	__i915_pmu_maybe_start_timer(i915);
+
+	/* Estimate how long we slept and accumulate that into rc6 counters */
+	if (i915->pmu.sample[__I915_SAMPLE_RC6].cur) {
+		u64 val;
+
+		val = jiffies - i915->pmu.sleep_timestamp;
+		val = jiffies_to_nsecs(val);
+		val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
+
+		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
+	}
+
  	spin_unlock_irq(&i915->pmu.lock);
  }
  
@@ -417,36 +463,17 @@ static int i915_pmu_event_init(struct perf_event *event)
  	return 0;
  }
  
-static u64 __get_rc6(struct drm_i915_private *i915)
+static u64 get_rc6(struct intel_gt *gt)
  {
-	u64 val;
-
-	val = intel_rc6_residency_ns(i915,
-				     IS_VALLEYVIEW(i915) ?
-				     VLV_GT_RENDER_RC6 :
-				     GEN6_GT_GFX_RC6);
-
-	if (HAS_RC6p(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
-
-	if (HAS_RC6pp(i915))
-		val += intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
-
-	return val;
-}
-
-static u64 get_rc6(struct drm_i915_private *i915)
-{
-#if IS_ENABLED(CONFIG_PM)

We still end up with never getting into estimation mode, even when 
parked, right? Hm.. why I added this.. never mind.

-	struct intel_runtime_pm *rpm = &i915->runtime_pm;
-	intel_wakeref_t wakeref;
+	struct drm_i915_private *i915 = gt->i915;
  	unsigned long flags;
  	u64 val;
  
-	wakeref = intel_runtime_pm_get_if_in_use(rpm);
-	if (wakeref) {
-		val = __get_rc6(i915);
-		intel_runtime_pm_put(rpm, wakeref);
+	spin_lock_irqsave(&i915->pmu.lock, flags);
+
+	if (intel_gt_pm_get_if_awake(gt)) {
+		val = __get_rc6(gt);

I thought earlier in the patch you were avoiding to call __get_rc6 under 
the irq off lock. It looks to be safe, unless I am missing yet another 
nasty cpu hotplug lock interaction, but it would be good to be consistent.

+		intel_gt_pm_put(gt);
  
  		/*
  		 * If we are coming back from being runtime suspended we must
@@ -454,7 +481,6 @@ static u64 get_rc6(struct drm_i915_private *i915)
  		 * previously.
  		 */
  
-		spin_lock_irqsave(&i915->pmu.lock, flags);
  
  		if (val >= i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
  			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
@@ -462,11 +488,7 @@ static u64 get_rc6(struct drm_i915_private *i915)
  		} else {
  			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
  		}
-
-		spin_unlock_irqrestore(&i915->pmu.lock, flags);
  	} else {
-		struct device *kdev = rpm->kdev;
-
  		/*
  		 * We are runtime suspended.
  		 *
@@ -474,42 +496,18 @@ static u64 get_rc6(struct drm_i915_private *i915)
  		 * on top of the last known real value, as the approximated RC6
  		 * counter value.
  		 */
-		spin_lock_irqsave(&i915->pmu.lock, flags);
  
-		/*
-		 * After the above branch intel_runtime_pm_get_if_in_use failed
-		 * to get the runtime PM reference we cannot assume we are in
-		 * runtime suspend since we can either: a) race with coming out
-		 * of it before we took the power.lock, or b) there are other
-		 * states than suspended which can bring us here.
-		 *
-		 * We need to double-check that we are indeed currently runtime
-		 * suspended and if not we cannot do better than report the last
-		 * known RC6 value.
-		 */

You think this race is not a concern any more? The issue of inconsistent 
state in core isn't any more since 2924bdee21edd, although I am not sure 
if 3b4ed2e2eb558 broke it. Presumably not since I reviewed it back then. 
But if we got woken up by now reader will see too much rc6 sleep. I 
guess it's noise level. Can't imagine even IGT would be so sensitive to 
get affected by it.

-		if (pm_runtime_status_suspended(kdev)) {
-			val = pm_runtime_suspended_time(kdev);
+		val = jiffies - i915->pmu.sleep_timestamp;
+		val = jiffies_to_nsecs(val);
+		val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
  
-			if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur)
-				i915->pmu.suspended_time_last = val;
+		i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
  
-			val -= i915->pmu.suspended_time_last;
-			val += i915->pmu.sample[__I915_SAMPLE_RC6].cur;
-
-			i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-		} else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
-			val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
-		} else {
-			val = i915->pmu.sample[__I915_SAMPLE_RC6].cur;
-		}
-
-		spin_unlock_irqrestore(&i915->pmu.lock, flags);
  	}
  
+	spin_unlock_irqrestore(&i915->pmu.lock, flags);
+
  	return val;
-#else
-	return __get_rc6(i915);
-#endif
  }
  
  static u64 __i915_pmu_event_read(struct perf_event *event)
@@ -550,7 +548,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
  			val = count_interrupts(i915);
  			break;
  		case I915_PMU_RC6_RESIDENCY:
-			val = get_rc6(i915);
+			val = get_rc6(&i915->gt);
  			break;
  		}
  	}
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 4fc4f2478301..6fa0240a1704 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -97,9 +97,9 @@ struct i915_pmu {
  	 */
  	struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS];
  	/**
-	 * @suspended_time_last: Cached suspend time from PM core.
+	 * @sleep_timestamp: Last time GT parked for RC6 estimation.
  	 */
-	u64 suspended_time_last;
+	unsigned long sleep_timestamp;
  	/**
  	 * @i915_attr: Memory block holding device attributes.
  	 */


Yeah I like it. Much better that we stay within confines of our own 
code. I wonder what it will mean if this fixes the occasional IGT fails. 
That something in core rpm subsystem accounting was going wrong?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx