From: Piotr Maciejewski <piotr.maciejewski@xxxxxxxxx>
i915 used to support time based sampling mode which is good for overall
system monitoring, but is not enough for query mode used to measure a
single draw call or dispatch. Gen9-Gen11 are using current i915 perf
implementation for query, but Gen12+ requires a new approach for query
based on triggered reports within oa buffer.
Triggering reports into the OA buffer is achieved by writing into a
a trigger register. Optionally an unused counter/register is set with a
marker value such that a triggered report can be identified in the OA
buffer. Reports are usually triggered at the start and end of work that
is measured.
Since OA buffer is large and queries can be frequent, an efficient way
to look for triggered reports is required. By knowing the current head
and tail offsets into the OA buffer, it is easier to determine the
locality of the reports of interest.
Current perf OA interface does not expose head/tail information to the
user and it filters out invalid reports before sending data to user.
Also considering limited size of user buffer used during a query,
creating a 1:1 copy of the OA buffer at the user space added undesired
complexity.
The solution was to map the OA buffer to user space provided
(1) that it is accessed from a privileged user.
(2) OA report filtering is not used.
These 2 conditions would satisfy the safety criteria that the current
perf interface addresses.
To enable the query:
- Add an ioctl to expose head and tail to the user
- Add an ioctl to return size and offset of the OA buffer
- Map the OA buffer to the user space
v2:
- Improve commit message (Chris)
- Do not mmap based on gem object filp. Instead, use perf_fd and support
mmap syscall (Chris)
- Pass non-zero offset in mmap to enforce the right object is
mapped (Chris)
- Do not expose gpu_address (Chris)
- Verify start and length of vma for page alignment (Lionel)
- Move SQNTL config out (Lionel)
v3: (Chris)
- Omit redundant checks
- Return VM_FAULT_SIGBUS is old stream is closed
- Maintain reference counts to stream in vm_open and vm_close
- Use switch to identify object to be mapped
v4: Call kref_put on closing perf fd (Chris)
Signed-off-by: Piotr Maciejewski <piotr.maciejewski@xxxxxxxxx>
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@xxxxxxxxx>
---
drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +-
drivers/gpu/drm/i915/gem/i915_gem_mman.h | 2 +
drivers/gpu/drm/i915/i915_perf.c | 211 ++++++++++++++++++++++-
drivers/gpu/drm/i915/i915_perf_types.h | 17 ++
include/uapi/drm/i915_drm.h | 32 ++++
5 files changed, 261 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index b23368529a40..7c4b9b0c334b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -204,7 +204,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj,
return view;
}
-static vm_fault_t i915_error_to_vmf_fault(int err)
+vm_fault_t i915_error_to_vmf_fault(int err)
{
switch (err) {
default:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
index efee9e0d2508..1190a3a228ea 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
@@ -29,4 +29,6 @@ void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
+vm_fault_t i915_error_to_vmf_fault(int err);
+
#endif
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2f23aad12c60..331c3f9c59e2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,10 +192,12 @@
*/
#include <linux/anon_inodes.h>
+#include <linux/mman.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_mman.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
@@ -378,6 +380,24 @@ static struct ctl_table_header *sysctl_header;
static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
+static void free_stream(struct kref *kref)
+{
+ struct i915_perf_stream *stream =
+ container_of(kref, typeof(*stream), refcount);
+
+ kfree(stream);
+}
+
+static void perf_stream_get(struct i915_perf_stream *stream)
+{
+ kref_get(&stream->refcount);
+}
+
+static void perf_stream_put(struct i915_perf_stream *stream)
+{
+ kref_put(&stream->refcount, free_stream);
+}
+
void i915_oa_config_release(struct kref *ref)
{
struct i915_oa_config *oa_config =
@@ -434,6 +454,30 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
}
+static u32 gen12_oa_hw_head_read(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+
+ return intel_uncore_read(uncore, GEN12_OAG_OAHEADPTR) &
+ GEN12_OAG_OAHEADPTR_MASK;
+}
+
+static u32 gen8_oa_hw_head_read(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+
+ return intel_uncore_read(uncore, GEN8_OAHEADPTR) &
+ GEN8_OAHEADPTR_MASK;
+}
+
+static u32 gen7_oa_hw_head_read(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+ u32 oastatus2 = intel_uncore_read(uncore, GEN7_OASTATUS2);
+
+ return oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
+}
+
/**
* oa_buffer_check_unlocked - check for data and update tail ptr state
* @stream: i915 stream instance
@@ -2934,6 +2978,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->poll_check_timer.function = oa_poll_check_timer_cb;
init_waitqueue_head(&stream->poll_wq);
spin_lock_init(&stream->oa_buffer.ptr_lock);
+ kref_init(&stream->refcount);
return 0;
@@ -3214,6 +3259,69 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
return ret;
}
+/**
+ * i915_perf_oa_buffer_head_tail_locked - head and tail of the OA buffer
+ * @stream: i915 perf stream
+ * @arg: pointer to oa buffer head and tail filled by this function.
+ */
+static int i915_perf_oa_buffer_head_tail_locked(struct i915_perf_stream *stream,
+ unsigned long arg)
+{
+ struct drm_i915_perf_oa_buffer_head_tail ht;
+ void __user *output = (void __user *)arg;
+ u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
+
+ if (i915_perf_stream_paranoid && !perfmon_capable()) {
+ DRM_DEBUG("Insufficient privileges to access OA buffer info\n");
+ return -EACCES;
+ }
+
+ if (!output)
+ return -EINVAL;
+
+ memset(&ht, 0, sizeof(ht));
+
+ ht.head = stream->perf->ops.oa_hw_head_read(stream) - gtt_offset;
+ ht.tail = stream->perf->ops.oa_hw_tail_read(stream) - gtt_offset;
+
+ if (copy_to_user(output, &ht, sizeof(ht)))
+ return -EFAULT;
+
+ return 0;
+}
+
+#define I915_PERF_OA_BUFFER_MMAP_OFFSET 1
+
+/**
+ * i915_perf_oa_buffer_info_locked - size and offset of the OA buffer
+ * @stream: i915 perf stream
+ * @arg: pointer to oa buffer info filled by this function.
+ */
+static int i915_perf_oa_buffer_info_locked(struct i915_perf_stream *stream,
+ unsigned long arg)
+{
+ struct drm_i915_perf_oa_buffer_info info;
+ void __user *output = (void __user *)arg;
+
+ if (i915_perf_stream_paranoid && !perfmon_capable()) {
+ DRM_DEBUG("Insufficient privileges to access OA buffer info\n");
+ return -EACCES;
+ }
+
+ if (!output)
+ return -EINVAL;
+
+ memset(&info, 0, sizeof(info));
+
+ info.size = stream->oa_buffer.vma->size;
+ info.offset = I915_PERF_OA_BUFFER_MMAP_OFFSET * PAGE_SIZE;
+
+ if (copy_to_user(output, &info, sizeof(info)))
+ return -EFAULT;
+
+ return 0;
+}
+
/**
* i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
* @stream: An i915 perf stream
@@ -3239,6 +3347,10 @@ static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
return 0;
case I915_PERF_IOCTL_CONFIG:
return i915_perf_config_locked(stream, arg);
+ case I915_PERF_IOCTL_GET_OA_BUFFER_INFO:
+ return i915_perf_oa_buffer_info_locked(stream, arg);
+ case I915_PERF_IOCTL_GET_OA_BUFFER_HEAD_TAIL:
+ return i915_perf_oa_buffer_head_tail_locked(stream, arg);
}
return -EINVAL;
@@ -3291,7 +3403,8 @@ static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
if (stream->ctx)
i915_gem_context_put(stream->ctx);
- kfree(stream);
+ WRITE_ONCE(stream->closed, true);
+ perf_stream_put(stream);
}
/**
@@ -3314,12 +3427,98 @@ static int i915_perf_release(struct inode *inode, struct file *file)
i915_perf_destroy_locked(stream);
mutex_unlock(&perf->lock);
+ unmap_mapping_range(file->f_mapping, 0, OA_BUFFER_SIZE, 1);
+
/* Release the reference the perf stream kept on the driver. */
drm_dev_put(&perf->i915->drm);
return 0;
}
+static void vm_open_oa(struct vm_area_struct *vma)
+{
+ struct i915_perf_stream *stream = vma->vm_private_data;
+
+ GEM_BUG_ON(!stream);
+ perf_stream_get(stream);
+}
+
+static void vm_close_oa(struct vm_area_struct *vma)
+{
+ struct i915_perf_stream *stream = vma->vm_private_data;
+
+ GEM_BUG_ON(!stream);
+ perf_stream_put(stream);
+}
+
+static vm_fault_t vm_fault_oa(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct i915_perf_stream *stream = vma->vm_private_data;
+ struct i915_perf *perf = stream->perf;
+ struct drm_i915_gem_object *obj = stream->oa_buffer.vma->obj;