Combine our per-engine metrics to track begin/end of the class as a whole and present that as culmulative runtime via PMU. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> Cc: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 18 +++ drivers/gpu/drm/i915/gt/intel_engine_stats.h | 34 ++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_user.c | 3 + drivers/gpu/drm/i915/gt/intel_gt.h | 2 + drivers/gpu/drm/i915/gt/intel_gt_types.h | 28 +++++ drivers/gpu/drm/i915/i915_pmu.c | 119 +++++++++++++++---- include/uapi/drm/i915_drm.h | 3 + 8 files changed, 185 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index b1b44afc94ba..3f71483bd6f5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -314,6 +314,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->instance = info->instance; __sprint_engine_name(engine); + engine->class_stats = >->stats_class[info->class]; + engine->props.dma_latency_ns = CONFIG_DRM_I915_DMA_LATENCY; engine->props.heartbeat_interval_ms = @@ -1789,6 +1791,22 @@ intel_engine_find_active_request(struct intel_engine_cs *engine) return active; } +ktime_t intel_runtime_stats_get_busy_time(const struct intel_runtime_stats *st) +{ + unsigned int seq; + ktime_t total; + + do { + seq = read_seqbegin(&st->lock); + total = st->total; + if (atomic_read(&st->active)) + total = ktime_add(total, + ktime_sub(ktime_get(), st->start)); + } while (unlikely(read_seqretry(&st->lock, seq))); + + return total; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "mock_engine.c" #include "selftest_engine.c" diff --git a/drivers/gpu/drm/i915/gt/intel_engine_stats.h b/drivers/gpu/drm/i915/gt/intel_engine_stats.h index 24fbdd94351a..b199137a977e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_stats.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_stats.h @@ -12,6 +12,36 @@ #include "i915_gem.h" /* GEM_BUG_ON */ #include "intel_engine.h" +#include "intel_gt_types.h" + +static inline void intel_engine_class_start(struct intel_runtime_stats *st) +{ + unsigned long flags; + + if (atomic_add_unless(&st->active, 1, 0)) + return; + + write_seqlock_irqsave(&st->lock, flags); + if (!atomic_read(&st->active)) + st->start = ktime_get(); + atomic_inc(&st->active); + write_sequnlock_irqrestore(&st->lock, flags); +} + +static inline void intel_engine_class_stop(struct intel_runtime_stats *st) +{ + unsigned long flags; + + if (atomic_add_unless(&st->active, -1, 1)) + return; + + write_seqlock_irqsave(&st->lock, flags); + if (atomic_read(&st->active) == 1) + st->total = + ktime_add(st->total, ktime_sub(ktime_get(), st->start)); + atomic_dec(&st->active); + write_sequnlock_irqrestore(&st->lock, flags); +} static inline void intel_engine_context_in(struct intel_engine_cs *engine) { @@ -22,6 +52,8 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) return; } + intel_engine_class_start(engine->class_stats); + /* The writer is serialised; but the pmu reader may be from hardirq */ local_irq_save(flags); write_seqcount_begin(&engine->stats.lock); @@ -55,6 +87,8 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) write_seqcount_end(&engine->stats.lock); local_irq_restore(flags); + + intel_engine_class_stop(engine->class_stats); } #endif /* __INTEL_ENGINE_STATS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 21e9f9de4844..ae415725cfde 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -499,6 +499,7 @@ struct intel_engine_cs { */ ktime_t rps; } stats; + struct intel_runtime_stats *class_stats; struct { unsigned long heartbeat_interval_ms; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index 6b5a4fdc14a0..24ac33d1b16e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -210,6 +210,9 @@ void intel_engines_driver_register(struct drm_i915_private *i915) GEM_BUG_ON(engine->uabi_class >= ARRAY_SIZE(uabi_instances)); engine->uabi_instance = uabi_instances[engine->uabi_class]++; + if (intel_engine_supports_stats(engine)) + engine->class_stats->enabled = true; + /* Replace the internal name with the final user facing name */ memcpy(old, engine->name, sizeof(engine->name)); scnprintf(engine->name, sizeof(engine->name), "%s%u", diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 9157c7411f60..f73f1d1d3754 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); +ktime_t intel_runtime_stats_get_busy_time(const struct intel_runtime_stats *st); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a83d3e18254d..3c12d4a3197e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -29,6 +29,32 @@ struct i915_ggtt; struct intel_engine_cs; struct intel_uncore; +struct intel_runtime_stats { + atomic_t active; + + /** + * @lock: Lock protecting the below fields. + */ + seqlock_t lock; + + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + + bool enabled; +}; + struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -112,6 +138,8 @@ struct intel_gt { ktime_t start; } stats; + struct intel_runtime_stats stats_class[MAX_ENGINE_CLASS + 1]; + struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] [MAX_ENGINE_INSTANCE + 1]; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 2b88c0baa1bf..026999a82231 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -9,6 +9,7 @@ #include "gt/intel_engine.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" +#include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_rc6.h" #include "gt/intel_rps.h" @@ -506,13 +507,28 @@ static int engine_event_init(struct perf_event *event) struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); struct intel_engine_cs *engine; + u8 class = engine_event_class(event); + u8 instance = engine_event_instance(event); + u8 sample = engine_event_sample(event); - engine = intel_engine_lookup_user(i915, engine_event_class(event), - engine_event_instance(event)); + if (instance == 0xff) { + if (sample == 0) + return 0; + + if (class >= ARRAY_SIZE(i915->gt.stats_class)) + return -ENODEV; + + if (!i915->gt.stats_class[class].enabled) + return -ENODEV; + + return -ENOENT; + } + + engine = intel_engine_lookup_user(i915, class, instance); if (!engine) return -ENODEV; - return engine_event_status(engine, engine_event_sample(event)); + return engine_event_status(engine, sample); } static int i915_pmu_event_init(struct perf_event *event) @@ -557,6 +573,33 @@ static int i915_pmu_event_init(struct perf_event *event) return 0; } +static u64 engine_event_read(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + u8 sample = engine_event_sample(event); + u8 class = engine_event_class(event); + u8 instance = engine_event_instance(event); + struct intel_engine_cs *engine; + ktime_t unused; + + if (instance == 0xff) { + struct intel_runtime_stats *st = &i915->gt.stats_class[class]; + + return intel_runtime_stats_get_busy_time(st); + } + + engine = intel_engine_lookup_user(i915, class, instance); + if (GEM_WARN_ON(!engine)) + return 0; /* Do nothing */ + else if (sample == I915_SAMPLE_BUSY && + intel_engine_supports_stats(engine)) + return ktime_to_ns(intel_engine_get_busy_time(engine, + &unused)); + else + return engine->pmu.sample[sample].cur; +} + static u64 __i915_pmu_event_read(struct perf_event *event) { struct drm_i915_private *i915 = @@ -565,24 +608,7 @@ static u64 __i915_pmu_event_read(struct perf_event *event) u64 val = 0; if (is_engine_event(event)) { - u8 sample = engine_event_sample(event); - struct intel_engine_cs *engine; - - engine = intel_engine_lookup_user(i915, - engine_event_class(event), - engine_event_instance(event)); - - if (drm_WARN_ON_ONCE(&i915->drm, !engine)) { - /* Do nothing */ - } else if (sample == I915_SAMPLE_BUSY && - intel_engine_supports_stats(engine)) { - ktime_t unused; - - val = ktime_to_ns(intel_engine_get_busy_time(engine, - &unused)); - } else { - val = engine->pmu.sample[sample].cur; - } + val = engine_event_read(event); } else { switch (event->attr.config) { case I915_PMU_ACTUAL_FREQUENCY: @@ -666,7 +692,7 @@ static void i915_pmu_enable(struct perf_event *event) * For per-engine events the bitmask and reference counting * is stored per engine. */ - if (is_engine_event(event)) { + if (is_engine_event(event) && engine_event_instance(event) != 0xff) { u8 sample = engine_event_sample(event); struct intel_engine_cs *engine; @@ -710,7 +736,7 @@ static void i915_pmu_disable(struct perf_event *event) spin_lock_irqsave(&pmu->lock, flags); - if (is_engine_event(event)) { + if (is_engine_event(event) && engine_event_instance(event) != 0xff) { u8 sample = engine_event_sample(event); struct intel_engine_cs *engine; @@ -891,6 +917,18 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, return ++attr; } +static const char *class_repr(u8 class) +{ + switch (class) { + case RENDER_CLASS: return "render"; + case VIDEO_DECODE_CLASS: return "video-decode"; + case VIDEO_ENHANCEMENT_CLASS: return "video-enhance"; + case COPY_ENGINE_CLASS: return "copy"; + } + + return NULL; +} + static struct attribute ** create_event_attributes(struct i915_pmu *pmu) { @@ -919,6 +957,8 @@ create_event_attributes(struct i915_pmu *pmu) struct i915_ext_attribute *i915_attr = NULL, *i915_iter; struct attribute **attr = NULL, **attr_iter; struct intel_engine_cs *engine; + unsigned long class_enable = 0; + unsigned long class_disable = 0; unsigned int i; /* Count how many counters we will be exposing. */ @@ -928,6 +968,11 @@ create_event_attributes(struct i915_pmu *pmu) } for_each_uabi_engine(engine, i915) { + if (intel_engine_supports_stats(engine)) + class_enable |= BIT(engine->class); + else + class_disable |= BIT(engine->class); + for (i = 0; i < ARRAY_SIZE(engine_events); i++) { if (!engine_event_status(engine, engine_events[i].sample)) @@ -935,6 +980,9 @@ create_event_attributes(struct i915_pmu *pmu) } } + class_enable &= ~class_disable; + count += hweight_long(class_enable); + /* Allocate attribute objects and table. */ i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); if (!i915_attr) @@ -1008,6 +1056,31 @@ create_event_attributes(struct i915_pmu *pmu) } } + for_each_set_bit(i, &class_enable, BITS_PER_LONG) { + const char *name; + char *str; + + name = class_repr(i); + if (GEM_WARN_ON(!name)) + continue; + + str = kasprintf(GFP_KERNEL, "%s-%s", name, "busy"); + if (!str) + goto err; + + *attr_iter++ = &i915_iter->attr.attr; + i915_iter = + add_i915_attr(i915_iter, str, + __I915_PMU_ENGINE(i, 0xff, 0)); + + str = kasprintf(GFP_KERNEL, "%s.unit", str); + if (!str) + goto err; + + *attr_iter++ = &pmu_iter->attr.attr; + pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); + } + pmu->i915_attr = i915_attr; pmu->pmu_attr = pmu_attr; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 1987e2ea79a3..3aacdf756e90 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -171,6 +171,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_ENGINE_SEMA(class, instance) \ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) +#define I915_PMU_CLASS_BUSY(class) \ + __I915_PMU_ENGINE(class, 0xff, 0) + #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) -- 2.20.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx