Quoting Tvrtko Ursulin (2019-12-19 18:00:16) > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > Some customers want to know how much of the GPU time are their clients > using in order to make dynamic load balancing decisions. > > With the hooks already in place which track the overall engine busyness, > we can extend that slightly to split that time between contexts. > > v2: Fix accounting for tail updates. > v3: Rebase. > v4: Mark currently running contexts as active on stats enable. > v5: Include some headers to fix the build. > v6: Added fine grained lock. > v7: Convert to seqlock. (Chris Wilson) > v8: Rebase and tidy with helpers. > v9: Refactor. > v10: Move recording start to promotion. (Chris) > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gt/intel_context.c | 20 +++++++ > drivers/gpu/drm/i915/gt/intel_context.h | 11 ++++ > drivers/gpu/drm/i915/gt/intel_context_types.h | 9 ++++ > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 16 +++++- > drivers/gpu/drm/i915/gt/intel_lrc.c | 52 ++++++++++++++++--- > 5 files changed, 100 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c > index b1e346d2d35f..b211b48d6cae 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.c > +++ b/drivers/gpu/drm/i915/gt/intel_context.c > @@ -243,6 +243,7 @@ intel_context_init(struct intel_context *ce, > INIT_LIST_HEAD(&ce->signals); > > mutex_init(&ce->pin_mutex); > + seqlock_init(&ce->stats.lock); > > i915_active_init(&ce->active, > __intel_context_active, __intel_context_retire); > @@ -337,6 +338,25 @@ struct i915_request *intel_context_create_request(struct intel_context *ce) > return rq; > } > > +ktime_t intel_context_get_busy_time(struct intel_context *ce) > +{ > + unsigned int seq; > + ktime_t total; > + > + do { > + seq = read_seqbegin(&ce->stats.lock); > + > + total = ce->stats.total; > + > + if (ce->stats.active) > + total = ktime_add(total, > + ktime_sub(ktime_get(), > + ce->stats.start)); > + } while (read_seqretry(&ce->stats.lock, seq)); > + > + return total; > +} > + > #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) > #include "selftest_context.c" > #endif > diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h > index b39eb1fcfbca..3a15cf32f0a3 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context.h > +++ b/drivers/gpu/drm/i915/gt/intel_context.h > @@ -160,4 +160,15 @@ static inline struct intel_ring *__intel_context_ring_size(u64 sz) > return u64_to_ptr(struct intel_ring, sz); > } > > +static inline void > +__intel_context_stats_start(struct intel_context_stats *stats, ktime_t now) > +{ > + if (!stats->active) { > + stats->start = now; > + stats->active = true; > + } > +} > + > +ktime_t intel_context_get_busy_time(struct intel_context *ce); > + > #endif /* __INTEL_CONTEXT_H__ */ > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h > index d1204cc899a3..12cbad0798cb 100644 > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h > @@ -11,6 +11,7 @@ > #include <linux/list.h> > #include <linux/mutex.h> > #include <linux/types.h> > +#include <linux/seqlock.h> > > #include "i915_active_types.h" > #include "i915_utils.h" > @@ -76,6 +77,14 @@ struct intel_context { > > /** sseu: Control eu/slice partitioning */ > struct intel_sseu sseu; > + > + /** stats: Context GPU engine busyness tracking. */ > + struct intel_context_stats { > + seqlock_t lock; > + bool active; > + ktime_t start; > + ktime_t total; > + } stats; > }; > > #endif /* __INTEL_CONTEXT_TYPES__ */ > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 3d1d48bf90cf..ac08781c8b24 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -1577,8 +1577,20 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) > > engine->stats.enabled_at = ktime_get(); > > - /* XXX submission method oblivious? */ > - for (port = execlists->active; (rq = *port); port++) > + /* > + * Mark currently running context as active. > + * XXX submission method oblivious? > + */ > + > + rq = NULL; > + port = execlists->active; > + if (port) > + rq = *port; > + if (rq) > + __intel_context_stats_start(&rq->hw_context->stats, > + engine->stats.enabled_at); > + > + for (; (rq = *port); port++) > engine->stats.active++; > > for (port = execlists->pending; (rq = *port); port++) { > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 4db54fd6a2fe..b186f06e508d 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -940,6 +940,7 @@ static void intel_engine_context_in(struct intel_engine_cs *engine) > if (engine->stats.enabled > 0) { > if (engine->stats.active++ == 0) > engine->stats.start = ktime_get(); > + > GEM_BUG_ON(engine->stats.active == 0); > } > > @@ -1088,6 +1089,32 @@ static void reset_active(struct i915_request *rq, > ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; > } > > +static void > +intel_context_stats_start(struct intel_context_stats *stats) > +{ > + unsigned long flags; > + > + write_seqlock_irqsave(&stats->lock, flags); > + __intel_context_stats_start(stats, ktime_get()); > + write_sequnlock_irqrestore(&stats->lock, flags); > +} > + > +static void > +intel_context_stats_stop(struct intel_context_stats *stats) > +{ > + unsigned long flags; > + > + if (!READ_ONCE(stats->active)) > + return; > + > + write_seqlock_irqsave(&stats->lock, flags); > + GEM_BUG_ON(!READ_ONCE(stats->active)); > + stats->total = ktime_add(stats->total, > + ktime_sub(ktime_get(), stats->start)); > + stats->active = false; > + write_sequnlock_irqrestore(&stats->lock, flags); > +} > + > static inline struct intel_engine_cs * > __execlists_schedule_in(struct i915_request *rq) > { > @@ -1155,7 +1182,7 @@ static inline void > __execlists_schedule_out(struct i915_request *rq, > struct intel_engine_cs * const engine) > { > - struct intel_context * const ce = rq->hw_context; > + struct intel_context *ce = rq->hw_context; > > /* > * NB process_csb() is not under the engine->active.lock and hence > @@ -1172,6 +1199,7 @@ __execlists_schedule_out(struct i915_request *rq, > intel_engine_add_retire(engine, ce->timeline); > > intel_engine_context_out(engine); > + intel_context_stats_stop(&ce->stats); > execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); > intel_gt_pm_put_async(engine->gt); > > @@ -2174,9 +2202,11 @@ static void process_csb(struct intel_engine_cs *engine) > promote = gen8_csb_parse(execlists, buf + 2 * head); > if (promote) { > struct i915_request * const *old = execlists->active; > + struct i915_request *rq; > > /* Point active to the new ELSP; prevent overwriting */ > WRITE_ONCE(execlists->active, execlists->pending); > + > set_timeslice(engine); > > if (!inject_preempt_hang(execlists)) > @@ -2196,8 +2226,16 @@ static void process_csb(struct intel_engine_cs *engine) > sizeof(*execlists->pending))); > > WRITE_ONCE(execlists->pending[0], NULL); > + > + rq = *execlists->active; > + if (rq) > + intel_context_stats_start(&rq->hw_context->stats); Code duplication :) Both branches are doing rq = *execlists->active; if (rq) intel_context_stats_start(&rq->hw_context->stats); as their final setp, just move it to after the if. Cost of maintaining stats was unnoticeable in profiles and did not affect wsim, so lgtm. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx