From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> We can use engine busy stats instead of the MMIO sampling timer for better efficiency. As minimum this saves period * num_engines / sec mmio reads, and in a better case, when only engine busy samplers are active, it enables us to not kick off the sampling timer at all. v2: Rebase. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_pmu.c | 86 ++++++++++++++++++++++++++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++ 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 0d9c0d07a432..3272ec0763bf 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -59,6 +59,11 @@ static u64 event_enabled_mask(struct perf_event *event) return config_enabled_mask(event->attr.config); } +static bool supports_busy_stats(void) +{ + return i915.enable_execlists; +} + static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) { u64 enable = i915->pmu.enable; @@ -69,6 +74,8 @@ static bool pmu_needs_timer(struct drm_i915_private *i915, bool gpu_active) if (!gpu_active) enable &= ~ENGINE_SAMPLE_MASK; + else if (supports_busy_stats()) + enable &= ~BIT(I915_SAMPLE_BUSY); return enable; } @@ -132,7 +139,8 @@ static void engines_sample(struct drm_i915_private *dev_priv) if (enable & BIT(I915_SAMPLE_QUEUED)) engine->pmu.sample[I915_SAMPLE_QUEUED] += PERIOD; - if (enable & BIT(I915_SAMPLE_BUSY)) { + if ((enable & BIT(I915_SAMPLE_BUSY)) && + !engine->pmu.busy_stats) { u32 val; fw = grab_forcewake(dev_priv, fw); @@ -349,14 +357,29 @@ static void i915_pmu_timer_cancel(struct perf_event *event) hrtimer_cancel(&hwc->hrtimer); } +static bool engine_needs_busy_stats(struct intel_engine_cs *engine) +{ + return supports_busy_stats() && + (engine->pmu.enable & BIT(I915_SAMPLE_BUSY)); +} + static void i915_pmu_enable(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); + struct intel_engine_cs *engine = NULL; unsigned long flags; spin_lock_irqsave(&i915->pmu.lock, flags); + if (is_engine_event(event)) { + engine = intel_engine_lookup_user(i915, + engine_event_class(event), + engine_event_instance(event)); + GEM_BUG_ON(!engine); + engine->pmu.enable |= BIT(engine_event_sample(event)); + } + i915->pmu.enable |= event_enabled_mask(event); if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) { @@ -364,16 +387,11 @@ static void i915_pmu_enable(struct perf_event *event) ns_to_ktime(PERIOD), 0, HRTIMER_MODE_REL_PINNED); i915->pmu.timer_enabled = true; - } - - if (is_engine_event(event)) { - struct intel_engine_cs *engine; - - engine = intel_engine_lookup_user(i915, - engine_event_class(event), - engine_event_instance(event)); - GEM_BUG_ON(!engine); - engine->pmu.enable |= BIT(engine_event_sample(event)); + } else if (is_engine_event(event) && engine_needs_busy_stats(engine) && + !engine->pmu.busy_stats) { + engine->pmu.busy_stats = true; + if (!cancel_delayed_work(&engine->pmu.disable_busy_stats)) + queue_work(i915->wq, &engine->pmu.enable_busy_stats); } spin_unlock_irqrestore(&i915->pmu.lock, flags); @@ -399,10 +417,17 @@ static void i915_pmu_disable(struct perf_event *event) engine_event_instance(event)); GEM_BUG_ON(!engine); engine->pmu.enable &= ~BIT(engine_event_sample(event)); + if (engine->pmu.busy_stats && + !engine_needs_busy_stats(engine)) { + engine->pmu.busy_stats = false; + queue_delayed_work(i915->wq, + &engine->pmu.disable_busy_stats, + round_jiffies_up_relative(2 * HZ)); + } mask = 0; for_each_engine(engine, i915, id) mask |= engine->pmu.enable; - mask = ~mask; + mask = (~mask) & ENGINE_SAMPLE_MASK; } else { mask = event_enabled_mask(event); } @@ -474,6 +499,9 @@ static void i915_pmu_event_read(struct perf_event *event) if (WARN_ON_ONCE(!engine)) { /* Do nothing */ + } else if (sample == I915_SAMPLE_BUSY && + engine->pmu.busy_stats) { + val = ktime_to_ns(intel_engine_get_busy_time(engine)); } else { val = engine->pmu.sample[sample]; } @@ -634,8 +662,27 @@ static const struct attribute_group *i915_pmu_attr_groups[] = { NULL }; +static void __enable_busy_stats(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), pmu.enable_busy_stats); + + WARN_ON_ONCE(intel_enable_engine_stats(engine)); +} + +static void __disable_busy_stats(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), pmu.disable_busy_stats.work); + + intel_disable_engine_stats(engine); +} + void i915_pmu_register(struct drm_i915_private *i915) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + if (INTEL_GEN(i915) <= 2) return; @@ -651,6 +698,13 @@ void i915_pmu_register(struct drm_i915_private *i915) spin_lock_init(&i915->pmu.lock); hrtimer_init(&i915->pmu.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + + for_each_engine(engine, i915, id) { + INIT_WORK(&engine->pmu.enable_busy_stats, __enable_busy_stats); + INIT_DELAYED_WORK(&engine->pmu.disable_busy_stats, + __disable_busy_stats); + } + i915->pmu.timer.function = i915_sample; i915->pmu.enable = 0; @@ -660,6 +714,9 @@ void i915_pmu_register(struct drm_i915_private *i915) void i915_pmu_unregister(struct drm_i915_private *i915) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + if (!i915->pmu.base.event_init) return; @@ -669,4 +726,9 @@ void i915_pmu_unregister(struct drm_i915_private *i915) i915->pmu.base.event_init = NULL; hrtimer_cancel(&i915->pmu.timer); + + for_each_engine(engine, i915, id) { + flush_work(&engine->pmu.enable_busy_stats); + flush_delayed_work(&engine->pmu.disable_busy_stats); + } } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 68f50ec72be6..fd5d838ca7b5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -248,6 +248,9 @@ struct intel_engine_cs { struct { u32 enable; u64 sample[4]; + bool busy_stats; + struct work_struct enable_busy_stats; + struct delayed_work disable_busy_stats; } pmu; /* -- 2.9.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx