On Wed, Jul 15, 2015 at 02:21:46PM +0530, sourab.gupta@xxxxxxxxx wrote: > From: Sourab Gupta <sourab.gupta@xxxxxxxxx> > > This patch adds support for retrieving MMIO register values alongwith > timestamps and forwarding them to userspace through perf. > The userspace can request upto 8 MMIO register values to be dumped. > The addresses of upto 8 MMIO registers can be passed through perf attr > config. The registers are checked against a whitelist before passing them > on. The commands to dump the values of these MMIO registers are then > inserted into the ring alongwith commands to dump the timestamps. > > Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_drv.h | 2 + > drivers/gpu/drm/i915/i915_oa_perf.c | 87 ++++++++++++++++++++++++++++++++++--- > include/uapi/drm/i915_drm.h | 10 ++++- > 3 files changed, 92 insertions(+), 7 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index f2fe8d0..e114175 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2017,7 +2017,9 @@ struct drm_i915_private { > #define I915_GEN_PMU_SAMPLE_RING (1<<0) > #define I915_GEN_PMU_SAMPLE_PID (1<<1) > #define I915_GEN_PMU_SAMPLE_TAG (1<<2) > +#define I915_GEN_PMU_SAMPLE_MMIO (1<<3) > int sample_info_flags; > + u32 mmio_list[8]; > } gen_pmu; > > void (*insert_profile_cmd[I915_PROFILE_MAX]) > diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c > index 1cc16ef..a9d44e0 100644 > --- a/drivers/gpu/drm/i915/i915_oa_perf.c > +++ b/drivers/gpu/drm/i915/i915_oa_perf.c > @@ -113,8 +113,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, > struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj; > struct i915_gen_pmu_node *entry; > unsigned long lock_flags; > - u32 addr = 0; > - int ret; > + u32 mmio_addr, addr = 0; > + int ret, i; > > entry = kzalloc(sizeof(*entry), GFP_KERNEL); > if (entry == NULL) { > @@ -150,6 +150,7 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, > spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); > > addr = i915_gem_obj_ggtt_offset(obj) + entry->offset; > + mmio_addr = addr + sizeof(struct drm_i915_ts_data); > > if (ring->id == RCS) { > ret = intel_ring_begin(ring, 6); > @@ -177,6 +178,25 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, > intel_ring_emit(ring, 0); /* imm high, must be zero */ > intel_ring_advance(ring); > } > + for (i = 0; i < 8; i++) { > + if (0 == dev_priv->gen_pmu.mmio_list[i]) > + break; > + > + addr = mmio_addr + > + i * sizeof(dev_priv->gen_pmu.mmio_list[i]); > + > + ret = intel_ring_begin(ring, 4); > + if (ret) > + return; > + > + intel_ring_emit(ring, > + MI_STORE_REGISTER_MEM(1) | > + MI_SRM_LRM_GLOBAL_GTT); > + intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]); > + intel_ring_emit(ring, addr); > + intel_ring_emit(ring, MI_NOOP); > + intel_ring_advance(ring); Premature optimisation, but can we store the register array with a single command? for (i = 0; i < 8; i++) { if (0 == dev_priv->gen_pmu.mmio_list[i]) break; ret = intel_ring_begin(ring, 2*i + 2); if (ret) return; intel_ring_emit(MI_STORE_REGISTER_MEM(i)); for (i = 0; i < 8; i++) { if (0 == dev_priv->gen_pmu.mmio_list[i]) break; intel_ring_emit(dev_priv->gen_pmu.mmio_list[i]); intel_ring_emit(addr + i*4); } intel_ring_emit(MI_NOOP); You should also consider ensuring that we have a command barrier between these and 3d operations. If only we had engine->emit_flush(I915_COMMAND_BARRIER). > +/* Some embargoed entries missing from whitelist */ > +static const struct register_whitelist { > + uint64_t offset; > + uint32_t size; > + /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */ > + uint32_t gen_bitmask; > +} whitelist[] = { > + { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) }, > + { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) }, > +}; > + > +static int check_mmio_whitelist(struct drm_i915_private *dev_priv, > + struct drm_i915_gen_pmu_attr *gen_attr) Just what I came looking for. > +{ > + struct register_whitelist const *entry = whitelist; > + int i, count; > + > + for (count = 0; count < 8; count++) { hardcoded value, ARRAY_SIZE(gen_attr->mmio_list) > + if (!gen_attr->mmio_list[count]) > + break; > + > + for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { Interesting choice of continuation in the ABI. Seems a litte forced. Move the whitelist into the function, no one else should think of accessing it (right?), then just use whitelist[i]. > + if (entry->offset == gen_attr->mmio_list[count] && > + (1 << INTEL_INFO(dev_priv->dev)->gen & dev_priv->dev->dev_priv->info, you have to be kidding me. INTEL_INFO(dev_priv) is the natural form. > + entry->gen_bitmask)) > + break; > + } > + > + if (i == ARRAY_SIZE(whitelist)) > + return -EINVAL; > + } > + return 0; > +} > + > static int i915_gen_event_init(struct perf_event *event) > { > struct drm_i915_private *dev_priv = > @@ -1674,6 +1740,17 @@ static int i915_gen_event_init(struct perf_event *event) > if (gen_attr.sample_tag) > dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG; > > + if (gen_attr.sample_mmio) { > + ret = check_mmio_whitelist(dev_priv, &gen_attr); > + if (ret) > + return ret; Global hw state should only be inspectable by root (unless the system has relaxed perf permissions). > + dev_priv->gen_pmu.sample_info_flags |= > + I915_GEN_PMU_SAMPLE_MMIO; > + memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list, > + sizeof(dev_priv->gen_pmu.mmio_list)); > + } > + > /* To avoid the complexity of having to accurately filter > * data and marshal to the appropriate client > * we currently only allow exclusive access */ > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 7ab4972..65bc39d 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -81,7 +81,7 @@ > > #define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */ > > -#define I915_GEN_PMU_ATTR_SIZE_VER0 8 /* sizeof first published struct */ > +#define I915_GEN_PMU_ATTR_SIZE_VER0 40 /* sizeof first published struct */ > > typedef struct _drm_i915_oa_attr { > __u32 size; > @@ -105,7 +105,9 @@ struct drm_i915_gen_pmu_attr { > __u32 sample_ring:1, > sample_pid:1, > sample_tag:1, > - __reserved_1:29; > + sample_mmio:1, > + __reserved_1:28; > + __u32 mmio_list[8]; > }; seems like having a BUILD_BUG_ON(sizeof(struct _drm_i915_oa_attr) != I915_GEN_PMU_ATTR_SIZE_VER0) is in order. -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx