From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> We add a PMU counter to expose the number of requests which are ready to run and waiting on a free slot on the GPU. This is useful to analyze the overall load of the system. v2: Don't limit to gen8+. v3: * Rebase for dynamic sysfs. * Drop currently executing requests. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_pmu.c | 34 +++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- include/uapi/drm/i915_drm.h | 8 +++++++- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index cbfca4a255ab..aaf48e85c35e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -36,7 +36,8 @@ #define ENGINE_SAMPLE_MASK \ (BIT(I915_SAMPLE_BUSY) | \ BIT(I915_SAMPLE_WAIT) | \ - BIT(I915_SAMPLE_SEMA)) + BIT(I915_SAMPLE_SEMA) | \ + BIT(I915_SAMPLE_QUEUED)) #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) @@ -220,6 +221,11 @@ static void engines_sample(struct drm_i915_private *dev_priv) update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], PERIOD, !!(val & RING_WAIT_SEMAPHORE)); + + if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED)) + update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED], + 1 / I915_SAMPLE_QUEUED_SCALE, + engine->queued); } if (fw) @@ -297,6 +303,7 @@ engine_event_status(struct intel_engine_cs *engine, switch (sample) { case I915_SAMPLE_BUSY: case I915_SAMPLE_WAIT: + case I915_SAMPLE_QUEUED: break; case I915_SAMPLE_SEMA: if (INTEL_GEN(engine->i915) < 6) @@ -407,6 +414,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event) } else { val = engine->pmu.sample[sample].cur; } + + if (sample == I915_SAMPLE_QUEUED) + val = div_u64(val, FREQUENCY); } else { switch (event->attr.config) { case I915_PMU_ACTUAL_FREQUENCY: @@ -719,6 +729,16 @@ static const struct attribute_group *i915_pmu_attr_groups[] = { { \ .sample = (__sample), \ .name = (__name), \ + .suffix = "unit", \ + .value = "ns", \ +} + +#define __engine_event_scale(__sample, __name, __scale) \ +{ \ + .sample = (__sample), \ + .name = (__name), \ + .suffix = "scale", \ + .value = (__scale), \ } static struct i915_ext_attribute * @@ -762,10 +782,14 @@ create_event_attributes(struct drm_i915_private *i915) static const struct { enum drm_i915_pmu_engine_sample sample; char *name; + char *suffix; + char *value; } engine_events[] = { __engine_event(I915_SAMPLE_BUSY, "busy"), __engine_event(I915_SAMPLE_SEMA, "sema"), __engine_event(I915_SAMPLE_WAIT, "wait"), + __engine_event_scale(I915_SAMPLE_QUEUED, "queued", + __stringify(I915_SAMPLE_QUEUED_SCALE)), }; unsigned int count = 0; struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; @@ -852,13 +876,15 @@ create_event_attributes(struct drm_i915_private *i915) engine->instance, engine_events[i].sample)); - str = kasprintf(GFP_KERNEL, "%s-%s.unit", - engine->name, engine_events[i].name); + str = kasprintf(GFP_KERNEL, "%s-%s.%s", + engine->name, engine_events[i].name, + engine_events[i].suffix); if (!str) goto err; *attr_iter++ = &pmu_iter->attr.attr; - pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); + pmu_iter = add_pmu_attr(pmu_iter, str, + engine_events[i].value); } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 77fff2488cde..84541b91bcd8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -379,7 +379,7 @@ struct intel_engine_cs { * * Our internal timer stores the current counters in this field. */ -#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1) +#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1) struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX]; /** * @busy_stats: Has enablement of engine stats tracking been diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 536ee4febd74..83458e5b1ac7 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -110,9 +110,12 @@ enum drm_i915_gem_engine_class { enum drm_i915_pmu_engine_sample { I915_SAMPLE_BUSY = 0, I915_SAMPLE_WAIT = 1, - I915_SAMPLE_SEMA = 2 + I915_SAMPLE_SEMA = 2, + I915_SAMPLE_QUEUED = 3 }; +#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */ + #define I915_PMU_SAMPLE_BITS (4) #define I915_PMU_SAMPLE_MASK (0xf) #define I915_PMU_SAMPLE_INSTANCE_BITS (8) @@ -133,6 +136,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ENGINE_SEMA(class, instance) \ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) +#define I915_PMU_ENGINE_QUEUED(class, instance) \ + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED) + #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) -- 2.14.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx