From: Sourab Gupta <sourab.gupta@xxxxxxxxx> This patch adds support for retrieving MMIO register values through Gen Perf PMU interface. Through this interface, now the userspace can request upto 8 MMIO register values to be dumped, alongwith the timestamp values which were dumped earlier across the batchbuffer boundaries. Userspace can pass the addresses of upto 8 MMIO registers through perf attr config. The commands to dump the values of these MMIO registers are then inserted into the ring alongwith commands to dump the timestamps. Signed-off-by: Sourab Gupta <sourab.gupta@xxxxxxxxx> --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_oa_perf.c | 119 ++++++++++++++++++++++++++++++++---- include/uapi/drm/i915_drm.h | 9 ++- 3 files changed, 117 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a0e1d17..1f86358 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1718,9 +1718,10 @@ struct drm_i915_ts_node_info { struct drm_i915_gem_request *req; }; -struct drm_i915_ts_node { +struct drm_i915_ts_mmio_node { /* ensure timestamp starts on a qword boundary */ struct drm_i915_ts_data timestamp; + __u32 mmio[8]; struct drm_i915_ts_node_info node_info; }; #endif @@ -2024,6 +2025,7 @@ struct drm_i915_private { struct work_struct work_timer; struct work_struct work_event_stop; struct completion complete; + u32 mmio_list[8]; } gen_pmu; struct list_head profile_cmd; diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c b/drivers/gpu/drm/i915/i915_oa_perf.c index ed0bdc9..465e823 100644 --- a/drivers/gpu/drm/i915/i915_oa_perf.c +++ b/drivers/gpu/drm/i915/i915_oa_perf.c @@ -113,10 +113,10 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, dev_priv->gen_pmu.buffer.addr; void *data_ptr = (u8 *)queue_hdr + queue_hdr->data_offset; int data_size = (queue_hdr->size_in_bytes - queue_hdr->data_offset); - u32 node_offset, timestamp_offset, addr = 0; - int ret; + u32 node_offset, timestamp_offset, mmio_offset, addr = 0; + int ret, i = 0; - struct drm_i915_ts_node *nodes = data_ptr; + struct drm_i915_ts_mmio_node *nodes = data_ptr; int num_nodes = 0; int index = 0; @@ -124,12 +124,14 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, index = queue_hdr->node_count % num_nodes; timestamp_offset = offsetof(struct drm_i915_ts_data, ts_low); + mmio_offset = + offsetof(struct drm_i915_ts_mmio_node, mmio); node_offset = i915_gem_obj_ggtt_offset(dev_priv->gen_pmu.buffer.obj) + queue_hdr->data_offset + - index * sizeof(struct drm_i915_ts_node); + index * sizeof(struct drm_i915_ts_mmio_node); addr = node_offset + - offsetof(struct drm_i915_ts_node, timestamp) + + offsetof(struct drm_i915_ts_mmio_node, timestamp) + timestamp_offset; if (ring->id == RCS) { @@ -158,6 +160,27 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id, intel_ring_emit(ring, 0); /* imm high, must be zero */ intel_ring_advance(ring); } + + for (i = 0; i < 8; i++) { + if (0 == dev_priv->gen_pmu.mmio_list[i]) + break; + + addr = node_offset + mmio_offset + + i * sizeof(dev_priv->gen_pmu.mmio_list[i]); + + ret = intel_ring_begin(ring, 4); + if (ret) + return; + + intel_ring_emit(ring, + MI_STORE_REGISTER_MEM(1) | + MI_SRM_LRM_GLOBAL_GTT); + intel_ring_emit(ring, dev_priv->gen_pmu.mmio_list[i]); + intel_ring_emit(ring, addr); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } + node_info = &nodes[index].node_info; i915_gem_request_assign(&node_info->req, ring->outstanding_lazy_request); @@ -314,11 +337,11 @@ static void init_gen_pmu_buf_queue(struct drm_i915_private *dev_priv) } static void forward_one_gen_pmu_sample(struct drm_i915_private *dev_priv, - struct drm_i915_ts_node *node) + struct drm_i915_ts_mmio_node *node) { struct perf_sample_data data; struct perf_event *event = dev_priv->gen_pmu.exclusive_event; - int snapshot_size = sizeof(struct drm_i915_ts_usernode); + int snapshot_size = sizeof(struct drm_i915_ts_mmio_usernode); struct perf_raw_record raw; perf_sample_data_init(&data, 0, event->hw.last_period); @@ -338,11 +361,11 @@ void i915_gen_pmu_wait_gpu(struct drm_i915_private *dev_priv) struct drm_i915_ts_queue_header *hdr = (struct drm_i915_ts_queue_header *) dev_priv->gen_pmu.buffer.addr; - struct drm_i915_ts_node *first_node, *node; + struct drm_i915_ts_mmio_node *first_node, *node; int head, tail, num_nodes, ret; struct drm_i915_gem_request *req; - first_node = (struct drm_i915_ts_node *) + first_node = (struct drm_i915_ts_mmio_node *) ((char *)hdr + hdr->data_offset); num_nodes = (hdr->size_in_bytes - hdr->data_offset) / sizeof(*node); @@ -375,14 +398,14 @@ void forward_gen_pmu_snapshots_work(struct work_struct *__work) struct drm_i915_ts_queue_header *hdr = (struct drm_i915_ts_queue_header *) dev_priv->gen_pmu.buffer.addr; - struct drm_i915_ts_node *first_node, *node; + struct drm_i915_ts_mmio_node *first_node, *node; int head, tail, num_nodes, ret; struct drm_i915_gem_request *req; if (dev_priv->gen_pmu.event_active == false) return; - first_node = (struct drm_i915_ts_node *) + first_node = (struct drm_i915_ts_mmio_node *) ((char *)hdr + hdr->data_offset); num_nodes = (hdr->size_in_bytes - hdr->data_offset) / sizeof(*node); @@ -421,11 +444,11 @@ void i915_gen_pmu_stop_work_fn(struct work_struct *__work) struct drm_i915_ts_queue_header *hdr = (struct drm_i915_ts_queue_header *) dev_priv->gen_pmu.buffer.addr; - struct drm_i915_ts_node *first_node, *node; + struct drm_i915_ts_mmio_node *first_node, *node; int head, tail, num_nodes, ret; struct drm_i915_gem_request *req; - first_node = (struct drm_i915_ts_node *) + first_node = (struct drm_i915_ts_mmio_node *) ((char *)hdr + hdr->data_offset); num_nodes = (hdr->size_in_bytes - hdr->data_offset) / sizeof(*node); @@ -1467,15 +1490,85 @@ static int i915_oa_event_event_idx(struct perf_event *event) return 0; } +static int i915_gen_pmu_copy_attr(struct drm_i915_gen_pmu_attr __user *uattr, + struct drm_i915_gen_pmu_attr *attr) +{ + u32 size; + int ret; + + if (!access_ok(VERIFY_WRITE, uattr, I915_GEN_PMU_ATTR_SIZE_VER0)) + return -EFAULT; + + /* + * zero the full structure, so that a short copy will be nice. + */ + memset(attr, 0, sizeof(*attr)); + + ret = get_user(size, &uattr->size); + if (ret) + return ret; + + if (size > PAGE_SIZE) /* silly large */ + goto err_size; + + if (size < I915_GEN_PMU_ATTR_SIZE_VER0) + goto err_size; + + /* + * If we're handed a bigger struct than we know of, + * ensure all the unknown bits are 0 - i.e. new + * user-space does not rely on any kernel feature + * extensions we dont know about yet. + */ + if (size > sizeof(*attr)) { + unsigned char __user *addr; + unsigned char __user *end; + unsigned char val; + + addr = (void __user *)uattr + sizeof(*attr); + end = (void __user *)uattr + size; + + for (; addr < end; addr++) { + ret = get_user(val, addr); + if (ret) + return ret; + if (val) + goto err_size; + } + size = sizeof(*attr); + } + + ret = copy_from_user(attr, uattr, size); + if (ret) + return -EFAULT; + +out: + return ret; + +err_size: + put_user(sizeof(*attr), &uattr->size); + ret = -E2BIG; + goto out; +} + static int i915_gen_event_init(struct perf_event *event) { struct drm_i915_private *dev_priv = container_of(event->pmu, typeof(*dev_priv), gen_pmu.pmu); + struct drm_i915_gen_pmu_attr gen_attr; int ret = 0; if (event->attr.type != event->pmu->type) return -ENOENT; + ret = i915_gen_pmu_copy_attr(to_user_ptr(event->attr.config), + &gen_attr); + if (ret) + return ret; + + memcpy(dev_priv->gen_pmu.mmio_list, gen_attr.mmio_list, + sizeof(dev_priv->gen_pmu.mmio_list)); + /* To avoid the complexity of having to accurately filter * data and marshal to the appropriate client * we currently only allow exclusive access */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a7da421..8d4deec 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -80,6 +80,7 @@ #define I915_OA_METRICS_SET_MAX I915_OA_METRICS_SET_SAMPLER_BALANCE #define I915_OA_ATTR_SIZE_VER0 32 /* sizeof first published struct */ +#define I915_GEN_PMU_ATTR_SIZE_VER0 36 /* sizeof first published struct */ typedef struct _drm_i915_oa_attr { __u32 size; @@ -97,6 +98,11 @@ typedef struct _drm_i915_oa_attr { __reserved_2:31; } drm_i915_oa_attr_t; +struct drm_i915_gen_pmu_attr { + __u32 size; + __u32 mmio_list[8]; +}; + /* Header for PERF_RECORD_DEVICE type events */ typedef struct _drm_i915_oa_event_header { __u32 type; @@ -143,9 +149,10 @@ struct drm_i915_ts_data { __u32 ts_high; }; -struct drm_i915_ts_usernode { +struct drm_i915_ts_mmio_usernode { /* ensure timestamp starts on a qword boundary */ struct drm_i915_ts_data timestamp; + __u32 mmio[8]; struct drm_i915_ts_node_footer node_info; }; -- 1.8.5.1 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx