Re: [PATCH i-g-t 4/7] intel-gpu-overlay: Catch-up to new i915 PMU

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 25/09/2017 16:31, Chris Wilson wrote:
Quoting Tvrtko Ursulin (2017-09-25 16:14:59)
From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxx>
---
  lib/igt_perf.h           | 93 ++++++++++++++++++++++++++++++++++--------------
  overlay/gem-interrupts.c |  2 +-
  overlay/gpu-freq.c       |  4 +--
  overlay/gpu-top.c        | 68 +++++++++++++++++++----------------
  overlay/power.c          |  4 +--
  overlay/rc6.c            |  6 ++--
  6 files changed, 111 insertions(+), 66 deletions(-)

diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index 8e674c3a3755..e29216f0500a 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -1,3 +1,27 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
  #ifndef I915_PERF_H
  #define I915_PERF_H
@@ -5,41 +29,56 @@ #include <linux/perf_event.h> -#define I915_SAMPLE_BUSY 0
-#define I915_SAMPLE_WAIT       1
-#define I915_SAMPLE_SEMA       2
+enum drm_i915_gem_engine_class {
+       I915_ENGINE_CLASS_OTHER = 0,
+       I915_ENGINE_CLASS_RENDER = 1,
+       I915_ENGINE_CLASS_COPY = 2,
+       I915_ENGINE_CLASS_VIDEO = 3,
+       I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+       I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+       I915_SAMPLE_QUEUED = 0,
+       I915_SAMPLE_BUSY = 1,
+       I915_SAMPLE_WAIT = 2,
+       I915_SAMPLE_SEMA = 3,
+       I915_ENGINE_SAMPLE_MAX /* non-ABI */
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+       (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
-#define I915_SAMPLE_RCS 0
-#define I915_SAMPLE_VCS                1
-#define I915_SAMPLE_BCS                2
-#define I915_SAMPLE_VECS       3
+#define __I915_PMU_ENGINE(class, instance, sample) \
+       ((class) << I915_PMU_CLASS_SHIFT | \
+       (instance) << I915_PMU_SAMPLE_BITS | \
+       (sample))
-#define __I915_PERF_COUNT(ring, id) ((ring) << 4 | (id))
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
-#define I915_PERF_COUNT_RCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_RCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_RCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_VCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_VCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_VCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_BCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_BCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_BCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
-#define I915_PERF_COUNT_VECS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
-#define I915_PERF_COUNT_VECS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
-#define I915_PERF_COUNT_VECS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
-#define I915_PERF_ACTUAL_FREQUENCY 32
-#define I915_PERF_REQUESTED_FREQUENCY 33
-#define I915_PERF_ENERGY 34
-#define I915_PERF_INTERRUPTS 35
+#define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS            __I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY         __I915_PMU_OTHER(3)
+#define I915_PMU_RC6p_RESIDENCY                __I915_PMU_OTHER(4)
+#define I915_PMU_RC6pp_RESIDENCY       __I915_PMU_OTHER(5)
-#define I915_PERF_RC6_RESIDENCY 40
-#define I915_PERF_RC6p_RESIDENCY       41
-#define I915_PERF_RC6pp_RESIDENCY      42
+#define I915_PMU_LAST I915_PMU_RC6pp_RESIDENCY
static inline int
  perf_event_open(struct perf_event_attr *attr,
diff --git a/overlay/gem-interrupts.c b/overlay/gem-interrupts.c
index 3eda24f4d7eb..add4a9dfd725 100644
--- a/overlay/gem-interrupts.c
+++ b/overlay/gem-interrupts.c
@@ -113,7 +113,7 @@ int gem_interrupts_init(struct gem_interrupts *irqs)
  {
         memset(irqs, 0, sizeof(*irqs));
- irqs->fd = perf_i915_open(I915_PERF_INTERRUPTS);
+       irqs->fd = perf_i915_open(I915_PMU_INTERRUPTS);
         if (irqs->fd < 0 && interrupts_read() < 0)
                 irqs->error = ENODEV;
diff --git a/overlay/gpu-freq.c b/overlay/gpu-freq.c
index 76c5ed9acfd1..c4619b87242a 100644
--- a/overlay/gpu-freq.c
+++ b/overlay/gpu-freq.c
@@ -37,8 +37,8 @@ static int perf_open(void)
  {
         int fd;
- fd = perf_i915_open_group(I915_PERF_ACTUAL_FREQUENCY, -1);
-       if (perf_i915_open_group(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
+       fd = perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, -1);
+       if (perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, fd) < 0) {
                 close(fd);
                 fd = -1;
         }
diff --git a/overlay/gpu-top.c b/overlay/gpu-top.c
index 812f47d5aced..61b8f62fd78c 100644
--- a/overlay/gpu-top.c
+++ b/overlay/gpu-top.c
@@ -43,49 +43,57 @@
  #define   RING_WAIT            (1<<11)
  #define   RING_WAIT_SEMAPHORE  (1<<10)
-#define __I915_PERF_RING(n) (4*n)
-#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0)
-#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
-#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
-
  static int perf_init(struct gpu_top *gt)
  {
-       const char *names[] = {
-               "RCS",
-               "BCS",
-               "VCS0",
-               "VCS1",
-               NULL,
+       struct engine_desc {
+               unsigned class, inst;
+               const char *name;
+       } *d, engines[] = {
+               { I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
+               { I915_ENGINE_CLASS_COPY, 0, "bcs0" },
+               { I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
+               { I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
+               { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },

Hmm, there is some hidden coupling with colours atm, but other than that
the order is flexible, iirc.

What do you mean? First you thought there's some issue but there isn't after all?

Oh right, VECS wasn't on the list before.. it seems to work anyway. Just haven't tried with five engines.


+               { 0, 0, NULL }
         };
-       int n;
- gt->fd = perf_i915_open_group(I915_PERF_RING_BUSY(0), -1);
+       d = &engines[0];
+
+       gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
+                                     -1);
         if (gt->fd < 0)
                 return -1;
- if (perf_i915_open_group(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
+       if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
+                                gt->fd) >= 0)
                 gt->have_wait = 1;
- if (perf_i915_open_group(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
+       if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
+                                gt->fd) >= 0)
                 gt->have_sema = 1;
- gt->ring[0].name = names[0];
+       gt->ring[0].name = d->name;
         gt->num_rings = 1;
- for (n = 1; names[n]; n++) {
-               if (perf_i915_open_group(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
-                       if (gt->have_wait &&
-                           perf_i915_open_group(I915_PERF_RING_WAIT(n),
-                                                gt->fd) < 0)
-                               return -1;
-
-                       if (gt->have_sema &&
-                           perf_i915_open_group(I915_PERF_RING_SEMA(n),
-                                                gt->fd) < 0)
-                               return -1;
-
-                       gt->ring[gt->num_rings++].name = names[n];
-               }
+       for (d++; d->name; d++) {
+               if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
+                                                             d->inst),
+                                       gt->fd) < 0)
+                       continue;
+
+               if (gt->have_wait &&
+                   perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
+                                                             d->inst),
+                                        gt->fd) < 0)
+                       return -1;
+
+               if (gt->have_sema &&
+                   perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
+                                                             d->inst),
+                                  gt->fd) < 0)
+                       return -1;
+
+               gt->ring[gt->num_rings++].name = d->name;
         }
return 0;
diff --git a/overlay/power.c b/overlay/power.c
index dd4aec6bffd9..805f4ca7805c 100644
--- a/overlay/power.c
+++ b/overlay/power.c
@@ -45,9 +45,7 @@ int power_init(struct power *power)
memset(power, 0, sizeof(*power)); - power->fd = perf_i915_open(I915_PERF_ENERGY);
-       if (power->fd != -1)
-               return 0;
+       power->fd = -1;

Hmm, didn't you say that the rapl values were exposed via perf as well?

Yes, I planned to add this back afterwards but can have it as part of this series as well.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux