From: Sourab Gupta <sourab.gupta@xxxxxxxxx> This patch adds support for retrieving MMIO register values alongwith timestamps and forwarding them to userspace through perf. The userspace can request upto 8 MMIO register values to be dumped. The addresses of upto 8 MMIO registers can be passed through perf attr config. The registers are checked against a whitelist before passing them on. The commands to dump the values of these MMIO registers are then inserted into the ring alongwith commands to dump the timestamps. Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_oa_perf.c | 87 ++++++++++++++++++++++++++++++++++--- include/uapi/drm/i915_drm.h | 10 ++++- 3 files changed, 92 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f2fe8d0..e114175 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2017,7 +2017,9 @@ struct drm_i915_private { #define I915_GEN_PMU_SAMPLE_RING (1<<0) #define I915_GEN_PMU_SAMPLE_PID (1<<1) #define I915_GEN_PMU_SAMPLE_TAG (1<<2) +#define I915_GEN_PMU_SAMPLE_MMIO (1<<3) int sample_info_flags; + u32 mmio_list[8]; } gen_pmu; void (*insert_profile_cmd[I915_PROFILE_MAX]) diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index 1cc16ef..a9d44e0 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -113,8 +113,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, struct drm_i915_gem_object *obj = dev_priv->gen_pmu.buffer.obj; struct i915_gen_pmu_node *entry; unsigned long lock_flags; - u32 addr = 0; - int ret; + u32 mmio_addr, addr = 0; + int ret, i; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { @@ -150,6 +150,7 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, spin_unlock_irqrestore(&dev_priv->gen_pmu.lock, lock_flags); addr = i915_gem_obj_ggtt_offset(obj) + entry->offset; + mmio_addr = addr + sizeof(struct drm_i915_ts_data); if (ring->id == RCS) { ret = intel_ring_begin(ring, 6); @@ -177,6 +178,25 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, intel_ring_emit(ring, 0); /* imm high, must be zero */ intel_ring_advance(ring); } + for (i = 0; i < 8; i++) { + if (0 == dev_priv->gen_pmu.mmio_list[i]) + break; + + addr = mmio_addr + + i * sizeof(dev_priv->gen_pmu.mmio_list[i]); + + ret = intel_ring_begin(ring, 4); + if (ret) + return; + + intel_ring_emit(ring, + MI_STORE_REGISTER_MEM(1) | + MI_SRM_LRM_GLOBAL_GTT); + intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]); + intel_ring_emit(ring, addr); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } obj->base.write_domain = I915_GEM_DOMAIN_RENDER; i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring); @@ -556,7 +576,7 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv, { struct perf_sample_data data; struct perf_event *event = dev_priv->gen_pmu.exclusive_event; - int ts_size, snapshot_size; + int ts_size, mmio_size, snapshot_size; u8 *snapshot, *current_ptr; struct drm_i915_ts_node_ctx_id *ctx_info; struct drm_i915_ts_node_ring_id *ring_info; @@ -565,10 +585,17 @@ static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv, struct perf_raw_record raw; ts_size = sizeof(struct drm_i915_ts_data); - snapshot_size = ts_size + sizeof(*ctx_info); + + if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO) + mmio_size = sizeof(struct drm_i915_mmio_data); + else + mmio_size = 0; + snapshot = dev_priv->gen_pmu.buffer.addr + node->offset; + snapshot_size = ts_size + mmio_size + sizeof(*ctx_info); - ctx_info = (struct drm_i915_ts_node_ctx_id *)(snapshot + ts_size); + ctx_info = (struct drm_i915_ts_node_ctx_id *) + (snapshot + mmio_size + ts_size); ctx_info->ctx_id = node->ctx_id; current_ptr = snapshot + snapshot_size; @@ -1053,6 +1080,9 @@ static int init_gen_pmu_buffer(struct perf_event *event) if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_TAG) node_size += sizeof(struct drm_i915_ts_node_tag); + if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_MMIO) + node_size += sizeof(struct drm_i915_mmio_data); + /* size has to be aligned to 8 bytes (required by relevant gpu cmds) */ node_size = ALIGN(node_size, 8); dev_priv->gen_pmu.buffer.node_size = node_size; @@ -1648,6 +1678,42 @@ err_size: goto out; } +#define GEN_RANGE(l, h) GENMASK(h, l) + +/* Some embargoed entries missing from whitelist */ +static const struct register_whitelist { + uint64_t offset; + uint32_t size; + /* supported gens, 0x10 for 4, 0x30 for 4 and 5, etc. */ + uint32_t gen_bitmask; +} whitelist[] = { + { GEN6_GT_GFX_RC6, 4, GEN_RANGE(7, 9) }, + { GEN6_GT_GFX_RC6p, 4, GEN_RANGE(7, 9) }, +}; + +static int check_mmio_whitelist(struct drm_i915_private *dev_priv, + struct drm_i915_gen_pmu_attr *gen_attr) +{ + struct register_whitelist const *entry = whitelist; + int i, count; + + for (count = 0; count < 8; count++) { + if (!gen_attr->mmio_list[count]) + break; + + for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) { + if (entry->offset == gen_attr->mmio_list[count] && + (1 << INTEL_INFO(dev_priv->dev)->gen & + entry->gen_bitmask)) + break; + } + + if (i == ARRAY_SIZE(whitelist)) + return -EINVAL; + } + return 0; +} + static int i915_gen_event_init(struct perf_event *event) { struct drm_i915_private *dev_priv = @@ -1674,6 +1740,17 @@ static int i915_gen_event_init(struct perf_event *event) if (gen_attr.sample_tag) dev_priv->gen_pmu.sample_info_flags |= I915_GEN_PMU_SAMPLE_TAG; + if (gen_attr.sample_mmio) { + ret = check_mmio_whitelist(dev_priv, &gen_attr); + if (ret) + return ret; + + dev_priv->gen_pmu.sample_info_flags |= + I915_GEN_PMU_SAMPLE_MMIO; + memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list, + sizeof(dev_priv->gen_pmu.mmio_list)); + } + /* To avoid the complexity of having to accurately filter * data and marshal to the appropriate client * we currently only allow exclusive access */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7ab4972..65bc39d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -81,7 +81,7 @@ #define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */ -#define I915_GEN_PMU_ATTR_SIZE_VER0 8 /* sizeof first published struct */ +#define I915_GEN_PMU_ATTR_SIZE_VER0 40 /* sizeof first published struct */ typedef struct _drm_i915_oa_attr { __u32 size; @@ -105,7 +105,9 @@ struct drm_i915_gen_pmu_attr { __u32 sample_ring:1, sample_pid:1, sample_tag:1, - __reserved_1:29; + sample_mmio:1, + __reserved_1:28; + __u32 mmio_list[8]; }; /* Header for PERF_RECORD_DEVICE type events */ @@ -155,6 +157,10 @@ struct drm_i915_ts_data { __u32 ts_high; }; +struct drm_i915_mmio_data { + __u32 mmio[8]; +}; + struct drm_i915_ts_node_ctx_id { __u32 ctx_id; __u32 pad; -- 1.8.5.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx