Re: [PATCH 1/1] drm/i915/perf: Map OA buffer to user space

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Jul 16, 2020 at 06:32:10PM +0300, Lionel Landwerlin wrote:
On 14/07/2020 10:22, Umesh Nerlige Ramappa wrote:
From: Piotr Maciejewski <piotr.maciejewski@xxxxxxxxx>

i915 used to support time based sampling mode which is good for overall
system monitoring, but is not enough for query mode used to measure a
single draw call or dispatch. Gen9-Gen11 are using current i915 perf
implementation for query, but Gen12+ requires a new approach based on
triggered reports within oa buffer. In order to enable above feature
two changes are required:

1. Whitelist update:
- enable triggered reports within oa buffer
- reading oa buffer head/tail/status information
- reading gpu ticks counter.

I would break the patch into feature sets :

    - 1 patch to whitelist the trigger registers

    - 1 patch to whitelist OA counters & tail/head/... registers

    - 1 patch for mmap feature


Here are some IGT tests for the trigger feature : https://patchwork.freedesktop.org/series/75311/


We should verify that when not i915-perf is not active the whitelisted OA counters are not incrementing.

Otherwise we're starting to leak information that was not available before.



2. Map oa buffer at umd driver level to solve below constraints related
   to time based sampling interface:
- longer time to access reports collected by oa buffer
- slow oa reports browsing since oa buffer size is large
- missing oa report index, so query cannot browse report directly
- with direct access to oa buffer, query can extract other useful
  reports like context switch information needed to calculate correct
  performance counters values.

Signed-off-by: Piotr Maciejewski <piotr.maciejewski@xxxxxxxxx>
---
 drivers/gpu/drm/i915/gt/intel_workarounds.c |  54 ++++++++
 drivers/gpu/drm/i915/i915_perf.c            | 130 +++++++++++++++++++-
 drivers/gpu/drm/i915/i915_perf_types.h      |  13 ++
 drivers/gpu/drm/i915/i915_reg.h             |  14 +++
 include/uapi/drm/i915_drm.h                 |  19 +++
 5 files changed, 227 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5726cd0a37e0..cf89928fc3a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1365,6 +1365,48 @@ whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
 	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
 }
+static void gen9_whitelist_build_performance_counters(struct i915_wa_list *w)
+{
+	/* OA buffer trigger report 2/6 used by performance query */
+	whitelist_reg(w, OAREPORTTRIG2);
+	whitelist_reg(w, OAREPORTTRIG6);
+
+	/* Performance counters A18-20 used by tbs marker query */
+	whitelist_reg_ext(w, OA_PERF_COUNTER_A18,
+			  RING_FORCE_TO_NONPRIV_ACCESS_RW |
+			  RING_FORCE_TO_NONPRIV_RANGE_16);

I would actually whitelist the entire set of OA_PERF_COUNTER (0-36).

Not sure about that. I thought the only reason we chose to whitelist A18-A20 was because these counters did not count anything. Whitelisting all A counters would just mean that an unprivileged user can keep sampling them all the time from a command buffer. Right?

Thanks,
Umesh


I would like to make use of them in Mesa for the Vulkan driver.

+
+	/* Read access to gpu ticks */
+	whitelist_reg_ext(w, GEN8_GPU_TICKS,
+			  RING_FORCE_TO_NONPRIV_ACCESS_RD);
+
+	/* Read access to: oa status, head, tail, buffer settings */
+	whitelist_reg_ext(w, GEN8_OASTATUS,
+			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
+			  RING_FORCE_TO_NONPRIV_RANGE_4);
+}
+
+static void gen12_whitelist_build_performance_counters(struct i915_wa_list *w)
+{
+	/* OA buffer trigger report 2/6 used by performance query */
+	whitelist_reg(w, GEN12_OAG_OAREPORTTRIG2);
+	whitelist_reg(w, GEN12_OAG_OAREPORTTRIG6);
+
+	/* Performance counters A18-20 used by tbs marker query */
+	whitelist_reg_ext(w, GEN12_OAG_PERF_COUNTER_A18,
+			  RING_FORCE_TO_NONPRIV_ACCESS_RW |
+			  RING_FORCE_TO_NONPRIV_RANGE_16);
+
+	/* Read access to gpu ticks */
+	whitelist_reg_ext(w, GEN12_OAG_GPU_TICKS,
+			  RING_FORCE_TO_NONPRIV_ACCESS_RD);
+
+	/* Read access to: oa status, head, tail, buffer settings */
+	whitelist_reg_ext(w, GEN12_OAG_OASTATUS,
+			  RING_FORCE_TO_NONPRIV_ACCESS_RD |
+			  RING_FORCE_TO_NONPRIV_RANGE_4);
+}
+

...

 struct i915_perf {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 86a23ced051b..2e3d264339e0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -675,6 +675,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define  GEN7_OASTATUS2_HEAD_MASK           0xffffffc0
 #define  GEN7_OASTATUS2_MEM_SELECT_GGTT     (1 << 0) /* 0: PPGTT, 1: GGTT */
+#define GEN8_GPU_TICKS _MMIO(0x2910)
 #define GEN8_OASTATUS _MMIO(0x2b08)
 #define  GEN8_OASTATUS_OVERRUN_STATUS	    (1 << 3)
 #define  GEN8_OASTATUS_COUNTER_OVERFLOW     (1 << 2)
@@ -696,6 +697,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define OABUFFER_SIZE_16M   (7 << 3)
 #define GEN12_OA_TLB_INV_CR _MMIO(0xceec)
+#define GEN12_SQCNT1 _MMIO(0x8718)
 /* Gen12 OAR unit */
 #define GEN12_OAR_OACONTROL _MMIO(0x2960)
@@ -731,6 +733,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define  GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS     (1 << 2)
 #define  GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1)
+#define GEN12_OAG_GPU_TICKS _MMIO(0xda90)
 #define GEN12_OAG_OASTATUS _MMIO(0xdafc)
 #define  GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2)
 #define  GEN12_OAG_OASTATUS_BUFFER_OVERFLOW  (1 << 1)
@@ -972,6 +975,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT    24
 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT    28
+/* Performance counters registers */
+#define OA_PERF_COUNTER_A18   _MMIO(0x2890)
+#define OA_PERF_COUNTER_A19   _MMIO(0x2898)
+#define OA_PERF_COUNTER_A20   _MMIO(0x28A0)
Maybe turn this into OA_PERF_COUNTER(idx) _MMIO(0x2800 + idx * 8)
+
+/* Gen12 Performance counters registers */
+#define GEN12_OAG_PERF_COUNTER_A16   _MMIO(0xDA00)
+#define GEN12_OAG_PERF_COUNTER_A18   _MMIO(0xDA10)
+#define GEN12_OAG_PERF_COUNTER_A19   _MMIO(0xDA18)
+#define GEN12_OAG_PERF_COUNTER_A20   _MMIO(0xDA20)
Same here
+
 /* Same layout as OASTARTTRIGX */
 #define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900)
 #define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..62b88c0123c8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -2048,6 +2048,25 @@ struct drm_i915_perf_open_param {z
  */
 #define I915_PERF_IOCTL_CONFIG	_IO('i', 0x2)
+/**
+ * Returns OA buffer properties.
+ *
+ * This ioctl is available in perf revision 6.
+ */
+#define I915_PERF_IOCTL_GET_OA_BUFFER_INFO _IO('i', 0x3)
+
+/**
+ * OA buffer information structure.
+ */
+struct drm_i915_perf_oa_buffer_info {
+	__u32 size;
+	__u32 head;
+	__u32 tail;
+	__u32 gpu_address;
+	__u64 cpu_address;
+	__u64 reserved[4];
+};
+
 /**
  * Common to all i915 perf records
  */

Thanks a lot,


-Lionel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [AMD Graphics]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux