From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
We add a PMU counter to expose the number of requests currently submitted
to the GPU, plus the number of runnable requests waiting on GPU time.
This is useful to analyze the overall load of the system.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
drivers/gpu/drm/i915/i915_pmu.c | 30 +++++++++++++++++++++++++-----
include/uapi/drm/i915_drm.h | 6 ++++++
2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 112243720ff3..b2b4b32af35f 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -36,7 +36,8 @@
#define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
BIT(I915_SAMPLE_WAIT) | \
- BIT(I915_SAMPLE_SEMA))
+ BIT(I915_SAMPLE_SEMA) | \
+ BIT(I915_SAMPLE_QUEUED))
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
@@ -223,6 +224,12 @@ static void engines_sample(struct drm_i915_private *dev_priv)
update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+
+ if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+ update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+ 1 / I915_SAMPLE_QUEUED_SCALE,
+ engine->queued +
+ (last_seqno - current_seqno));
}
if (fw)
@@ -310,6 +317,10 @@ static int engine_event_init(struct perf_event *event)
if (INTEL_GEN(i915) < 6)
return -ENODEV;
break;
+ case I915_SAMPLE_QUEUED:
+ if (INTEL_GEN(i915) < 8)
+ return -ENODEV;
+ break;
default:
return -ENOENT;
}
@@ -399,6 +410,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
} else if (sample == I915_SAMPLE_BUSY &&
engine->pmu.busy_stats) {
val = ktime_to_ns(intel_engine_get_busy_time(engine));
+ } else if (sample == I915_SAMPLE_QUEUED) {
+ val =
+ div_u64(engine->pmu.sample[I915_SAMPLE_QUEUED].cur,
+ FREQUENCY);
} else {
val = engine->pmu.sample[sample].cur;
}
@@ -679,13 +694,18 @@ static ssize_t i915_pmu_event_show(struct device *dev,
I915_EVENT_STR(_name.unit, _unit)
#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \
- I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \
+ I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample))
+
+#define I915_ENGINE_EVENT_NS(_name, _class, _instance, _sample) \
+ I915_ENGINE_EVENT(_name, _class, _instance, _sample), \
I915_EVENT_STR(_name.unit, "ns")
#define I915_ENGINE_EVENTS(_name, _class, _instance) \
- I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
- I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
- I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT)
+ I915_ENGINE_EVENT_NS(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \
+ I915_ENGINE_EVENT_NS(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \
+ I915_ENGINE_EVENT_NS(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT), \
+ I915_ENGINE_EVENT(_name##_instance-queued, _class, _instance, I915_SAMPLE_QUEUED), \
+ I915_EVENT_STR(_name##_instance-queued.scale, __stringify(I915_SAMPLE_QUEUED_SCALE))