On Wed, 2017-11-22 at 12:46 +0000, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > > We add a PMU counter to expose the number of requests currently submitted > to the GPU, plus the number of runnable requests waiting on GPU time. > > This is useful to analyze the overall load of the system. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_pmu.c | 30 +++++++++++++++++++++++++----- > include/uapi/drm/i915_drm.h | 6 ++++++ > 2 files changed, 31 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c > index 112243720ff3..b2b4b32af35f 100644 > --- a/drivers/gpu/drm/i915/i915_pmu.c > +++ b/drivers/gpu/drm/i915/i915_pmu.c > @@ -36,7 +36,8 @@ > #define ENGINE_SAMPLE_MASK \ > (BIT(I915_SAMPLE_BUSY) | \ > BIT(I915_SAMPLE_WAIT) | \ > - BIT(I915_SAMPLE_SEMA)) > + BIT(I915_SAMPLE_SEMA) | \ > + BIT(I915_SAMPLE_QUEUED)) > > #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) > > @@ -223,6 +224,12 @@ static void engines_sample(struct drm_i915_private *dev_priv) > > update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA], > PERIOD, !!(val & RING_WAIT_SEMAPHORE)); > + > + if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED)) > + update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED], > + 1 / I915_SAMPLE_QUEUED_SCALE, > + engine->queued + > + (last_seqno - current_seqno)); > } > > if (fw) > @@ -310,6 +317,10 @@ static int engine_event_init(struct perf_event *event) > if (INTEL_GEN(i915) < 6) > return -ENODEV; > break; > + case I915_SAMPLE_QUEUED: > + if (INTEL_GEN(i915) < 8) > + return -ENODEV; > + break; > default: > return -ENOENT; > } > @@ -399,6 +410,10 @@ static u64 __i915_pmu_event_read(struct perf_event *event) > } else if (sample == I915_SAMPLE_BUSY && > engine->pmu.busy_stats) { > val = ktime_to_ns(intel_engine_get_busy_time(engine)); > + } else if (sample == I915_SAMPLE_QUEUED) { > + val = > + div_u64(engine->pmu.sample[I915_SAMPLE_QUEUED].cur, > + FREQUENCY); > } else { > val = engine->pmu.sample[sample].cur; > } > @@ -679,13 +694,18 @@ static ssize_t i915_pmu_event_show(struct device *dev, > I915_EVENT_STR(_name.unit, _unit) > > #define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \ > - I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \ > + I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)) > + > +#define I915_ENGINE_EVENT_NS(_name, _class, _instance, _sample) \ > + I915_ENGINE_EVENT(_name, _class, _instance, _sample), \ > I915_EVENT_STR(_name.unit, "ns") > > #define I915_ENGINE_EVENTS(_name, _class, _instance) \ > - I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ > - I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ > - I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT) > + I915_ENGINE_EVENT_NS(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ > + I915_ENGINE_EVENT_NS(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ > + I915_ENGINE_EVENT_NS(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT), \ > + I915_ENGINE_EVENT(_name##_instance-queued, _class, _instance, I915_SAMPLE_QUEUED), \ > + I915_EVENT_STR(_name##_instance-queued.scale, __stringify(I915_SAMPLE_QUEUED_SCALE)) We expose queued as an "instant" metric, i.e. that's a number of requests on the very moment when we query the metric, i.e. that's not an ever growing counter - is that right? I doubt such a metric will make sense for perf-stat. Can we somehow restrict it to be queried by uAPI only and avoid perf-stat for it? > > static struct attribute *i915_pmu_events_attrs[] = { > I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0), > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 915a6e85a855..20ee668d1428 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -111,9 +111,12 @@ enum drm_i915_pmu_engine_sample { > I915_SAMPLE_BUSY = 0, > I915_SAMPLE_WAIT = 1, > I915_SAMPLE_SEMA = 2, > + I915_SAMPLE_QUEUED = 3, > I915_ENGINE_SAMPLE_MAX /* non-ABI */ > }; > > +#define I915_SAMPLE_QUEUED_SCALE 1e-2 /* No braces please. */ > + > #define I915_PMU_SAMPLE_BITS (4) > #define I915_PMU_SAMPLE_MASK (0xf) > #define I915_PMU_SAMPLE_INSTANCE_BITS (8) > @@ -134,6 +137,9 @@ enum drm_i915_pmu_engine_sample { > #define I915_PMU_ENGINE_SEMA(class, instance) \ > __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA) > > +#define I915_PMU_ENGINE_QUEUED(class, instance) \ > + __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED) > + > #define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) > > #define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0) _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx