Signed-off-by: Piotr Maciejewski <piotr.maciejewski@xxxxxxxxx>
---
drivers/gpu/drm/i915/gt/intel_workarounds.c | 54 ++++++++
drivers/gpu/drm/i915/i915_perf.c | 130 +++++++++++++++++++-
drivers/gpu/drm/i915/i915_perf_types.h | 13 ++
drivers/gpu/drm/i915/i915_reg.h | 14 +++
include/uapi/drm/i915_drm.h | 19 +++
5 files changed, 227 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5726cd0a37e0..cf89928fc3a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1365,6 +1365,48 @@ whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
}
+static void gen9_whitelist_build_performance_counters(struct i915_wa_list *w)
+{
+ /* OA buffer trigger report 2/6 used by performance query */
+ whitelist_reg(w, OAREPORTTRIG2);
+ whitelist_reg(w, OAREPORTTRIG6);
+
+ /* Performance counters A18-20 used by tbs marker query */
+ whitelist_reg_ext(w, OA_PERF_COUNTER_A18,
+ RING_FORCE_TO_NONPRIV_ACCESS_RW |
+ RING_FORCE_TO_NONPRIV_RANGE_16);
+
+ /* Read access to gpu ticks */
+ whitelist_reg_ext(w, GEN8_GPU_TICKS,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
+
+ /* Read access to: oa status, head, tail, buffer settings */
+ whitelist_reg_ext(w, GEN8_OASTATUS,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
+ RING_FORCE_TO_NONPRIV_RANGE_4);
+}
+
+static void gen12_whitelist_build_performance_counters(struct i915_wa_list *w)
+{
+ /* OA buffer trigger report 2/6 used by performance query */
+ whitelist_reg(w, GEN12_OAG_OAREPORTTRIG2);
+ whitelist_reg(w, GEN12_OAG_OAREPORTTRIG6);
+
+ /* Performance counters A18-20 used by tbs marker query */
+ whitelist_reg_ext(w, GEN12_OAG_PERF_COUNTER_A18,
+ RING_FORCE_TO_NONPRIV_ACCESS_RW |
+ RING_FORCE_TO_NONPRIV_RANGE_16);
+
+ /* Read access to gpu ticks */
+ whitelist_reg_ext(w, GEN12_OAG_GPU_TICKS,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
+
+ /* Read access to: oa status, head, tail, buffer settings */
+ whitelist_reg_ext(w, GEN12_OAG_OASTATUS,
+ RING_FORCE_TO_NONPRIV_ACCESS_RD |
+ RING_FORCE_TO_NONPRIV_RANGE_4);
+}
+
static void gen9_whitelist_build(struct i915_wa_list *w)
{
/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
@@ -1378,6 +1420,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaSendPushConstantsFromMMIO:skl,bxt */
whitelist_reg(w, COMMON_SLICE_CHICKEN2);
+
+ /* Performance counters support */
+ gen9_whitelist_build_performance_counters(w);
}
static void skl_whitelist_build(struct intel_engine_cs *engine)
@@ -1471,6 +1516,9 @@ static void cnl_whitelist_build(struct intel_engine_cs *engine)
/* WaEnablePreemptionGranularityControlByUMD:cnl */
whitelist_reg(w, GEN8_CS_CHICKEN1);
+
+ /* Performance counters support */
+ gen9_whitelist_build_performance_counters(w);
}
static void icl_whitelist_build(struct intel_engine_cs *engine)
@@ -1500,6 +1548,9 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
RING_FORCE_TO_NONPRIV_ACCESS_RD |
RING_FORCE_TO_NONPRIV_RANGE_4);
+
+ /* Performance counters support */
+ gen9_whitelist_build_performance_counters(w);
break;
case VIDEO_DECODE_CLASS:
@@ -1550,6 +1601,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
/* Wa_1806527549:tgl */
whitelist_reg(w, HIZ_CHICKEN);
+
+ /* Performance counters support */
+ gen12_whitelist_build_performance_counters(w);
break;
default:
whitelist_reg_ext(w,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c6f6370283cf..06a3fff52dfa 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -192,6 +192,7 @@
*/
#include <linux/anon_inodes.h>
+#include <linux/mman.h>
#include <linux/sizes.h>
#include <linux/uuid.h>
@@ -434,6 +435,30 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
}
+static u32 gen12_oa_hw_head_read(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+
+ return intel_uncore_read(uncore, GEN12_OAG_OAHEADPTR) &
+ GEN12_OAG_OAHEADPTR_MASK;
+}
+
+static u32 gen8_oa_hw_head_read(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+
+ return intel_uncore_read(uncore, GEN8_OAHEADPTR) &
+ GEN8_OAHEADPTR_MASK;
+}
+
+static u32 gen7_oa_hw_head_read(struct i915_perf_stream *stream)
+{
+ struct intel_uncore *uncore = stream->uncore;
+ u32 oastatus2 = intel_uncore_read(uncore, GEN7_OASTATUS2);
+
+ return oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
+}
+
/**
* oa_buffer_check_unlocked - check for data and update tail ptr state
* @stream: i915 stream instance
@@ -1328,6 +1353,7 @@ free_oa_buffer(struct i915_perf_stream *stream)
i915_vma_unpin_and_release(&stream->oa_buffer.vma,
I915_VMA_RELEASE_MAP);
+ stream->oa_buffer.cpu_address = 0;
stream->oa_buffer.vaddr = NULL;
}
@@ -1448,7 +1474,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
* bit."
*/
intel_uncore_write(uncore, GEN8_OABUFFER, gtt_offset |
- OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
+ OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT |
+ GEN7_OABUFFER_EDGE_TRIGGER);
intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
/* Mark that we need updated tail pointers to read from... */
@@ -1501,7 +1528,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
* bit."
*/
intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset |
- OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
+ OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT |
+ GEN7_OABUFFER_EDGE_TRIGGER);
intel_uncore_write(uncore, GEN12_OAG_OATAILPTR,
gtt_offset & GEN12_OAG_OATAILPTR_MASK);
@@ -1562,6 +1590,7 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
goto err_unref;
}
stream->oa_buffer.vma = vma;
+ stream->oa_buffer.cpu_address = 0;
stream->oa_buffer.vaddr =
i915_gem_object_pin_map(bo, I915_MAP_WB);
@@ -1584,6 +1613,52 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
return ret;
}
+static int map_oa_buffer(struct i915_perf_stream *stream)
+{
+ unsigned long address = 0;
+ const u64 size = OA_BUFFER_SIZE;
+ struct i915_vma *oabuffer_vma = stream->oa_buffer.vma;
+ struct drm_i915_gem_object *oabuffer_obj = oabuffer_vma->obj;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma = NULL;
+
+ if(stream->oa_buffer.cpu_address != 0)
+ return 0;
+
+ if (!boot_cpu_has(X86_FEATURE_PAT))
+ return -ENODEV;
+
+ if (!oabuffer_obj || !oabuffer_vma)
+ return -ENOENT;
+
+ if (!oabuffer_obj->base.filp)
+ return -ENXIO;
+
+ if (range_overflows_t(u64, 0, size, oabuffer_obj->base.size))
+ return -EINVAL;
+
+ address = vm_mmap(oabuffer_obj->base.filp, 0, size,
+ PROT_READ, MAP_SHARED, 0);
+
+ if (IS_ERR_VALUE(address))
+ return address;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ vma = find_vma(mm, address);
+ if (vma) {
+ vma->vm_page_prot =
+ pgprot_writecombine(vm_get_page_prot(vma->vm_flags));