The drm-stats fdinfo tags made available to user space are drm-engine,
drm-cycles, drm-max-freq and drm-curfreq, one per job slot.
This deviates from standard practice in other DRM drivers, where a
single
set of key:value pairs is provided for the whole render engine.
However,
Panfrost has separate queues for fragment and vertex/tiler jobs, so a
decision was made to calculate bus cycles and workload times
separately.
Maximum operating frequency is calculated at devfreq initialisation
time.
Current frequency is made available to user space because nvtop uses it
when performing engine usage calculations.
It is important to bear in mind that both GPU cycle and kernel time
numbers
provided are at best rough estimations, and always reported in
excess from
the actual figure because of two reasons:
- Excess time because of the delay between the end of a job
processing,
the subsequent job IRQ and the actual time of the sample.
- Time spent in the engine queue waiting for the GPU to pick up
the next
job.
To avoid race conditions during enablement/disabling, a reference
counting
mechanism was introduced, and a job flag that tells us whether a
given job
increased the refcount. This is necessary, because user space can
toggle
cycle counting through a debugfs file, and a given job might have
been in
flight by the time cycle counting was disabled.
The main goal of the debugfs cycle counter knob is letting tools
like nvtop
or IGT's gputop switch it at any time, to avoid power waste in case no
engine usage measuring is necessary.
Signed-off-by: Adrián Larumbe <adrian.larumbe@xxxxxxxxxxxxx>
Reviewed-by: Boris Brezillon <boris.brezillon@xxxxxxxxxxxxx>
Reviewed-by: Steven Price <steven.price@xxxxxxx>
---
drivers/gpu/drm/panfrost/Makefile | 2 +
drivers/gpu/drm/panfrost/panfrost_debugfs.c | 20 ++++++++
drivers/gpu/drm/panfrost/panfrost_debugfs.h | 13 +++++
drivers/gpu/drm/panfrost/panfrost_devfreq.c | 8 +++
drivers/gpu/drm/panfrost/panfrost_devfreq.h | 3 ++
drivers/gpu/drm/panfrost/panfrost_device.c | 2 +
drivers/gpu/drm/panfrost/panfrost_device.h | 13 +++++
drivers/gpu/drm/panfrost/panfrost_drv.c | 57
++++++++++++++++++++-
drivers/gpu/drm/panfrost/panfrost_gpu.c | 41 +++++++++++++++
drivers/gpu/drm/panfrost/panfrost_gpu.h | 4 ++
drivers/gpu/drm/panfrost/panfrost_job.c | 24 +++++++++
drivers/gpu/drm/panfrost/panfrost_job.h | 5 ++
12 files changed, 191 insertions(+), 1 deletion(-)
create mode 100644 drivers/gpu/drm/panfrost/panfrost_debugfs.c
create mode 100644 drivers/gpu/drm/panfrost/panfrost_debugfs.h
diff --git a/drivers/gpu/drm/panfrost/Makefile
b/drivers/gpu/drm/panfrost/Makefile
index 7da2b3f02ed9..2c01c1e7523e 100644
--- a/drivers/gpu/drm/panfrost/Makefile
+++ b/drivers/gpu/drm/panfrost/Makefile
@@ -12,4 +12,6 @@ panfrost-y := \
panfrost_perfcnt.o \
panfrost_dump.o
+panfrost-$(CONFIG_DEBUG_FS) += panfrost_debugfs.o
+
obj-$(CONFIG_DRM_PANFROST) += panfrost.o
diff --git a/drivers/gpu/drm/panfrost/panfrost_debugfs.c
b/drivers/gpu/drm/panfrost/panfrost_debugfs.c
new file mode 100644
index 000000000000..cc14eccba206
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_debugfs.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/debugfs.h>
+#include <linux/platform_device.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
+#include <drm/panfrost_drm.h>
+
+#include "panfrost_device.h"
+#include "panfrost_gpu.h"
+#include "panfrost_debugfs.h"
+
+void panfrost_debugfs_init(struct drm_minor *minor)
+{
+ struct drm_device *dev = minor->dev;
+ struct panfrost_device *pfdev =
platform_get_drvdata(to_platform_device(dev->dev));
+
+ debugfs_create_atomic_t("profile", 0600, minor->debugfs_root,
&pfdev->profile_mode);
+}
diff --git a/drivers/gpu/drm/panfrost/panfrost_debugfs.h
b/drivers/gpu/drm/panfrost/panfrost_debugfs.h
new file mode 100644
index 000000000000..db1c158bcf2f
--- /dev/null
+++ b/drivers/gpu/drm/panfrost/panfrost_debugfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2023 Collabora ltd.
+ */
+
+#ifndef PANFROST_DEBUGFS_H
+#define PANFROST_DEBUGFS_H
+
+#ifdef CONFIG_DEBUG_FS
+void panfrost_debugfs_init(struct drm_minor *minor);
+#endif
+
+#endif /* PANFROST_DEBUGFS_H */
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index 58dfb15a8757..28caffc689e2 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -58,6 +58,7 @@ static int panfrost_devfreq_get_dev_status(struct
device *dev,
spin_lock_irqsave(&pfdevfreq->lock, irqflags);
panfrost_devfreq_update_utilization(pfdevfreq);
+ pfdevfreq->current_frequency = status->current_frequency;
status->total_time = ktime_to_ns(ktime_add(pfdevfreq->busy_time,
pfdevfreq->idle_time));
@@ -117,6 +118,7 @@ int panfrost_devfreq_init(struct panfrost_device
*pfdev)
struct devfreq *devfreq;
struct thermal_cooling_device *cooling;
struct panfrost_devfreq *pfdevfreq = &pfdev->pfdevfreq;
+ unsigned long freq = ULONG_MAX;
if (pfdev->comp->num_supplies > 1) {
/*
@@ -172,6 +174,12 @@ int panfrost_devfreq_init(struct
panfrost_device *pfdev)
return ret;
}
+ /* Find the fastest defined rate */
+ opp = dev_pm_opp_find_freq_floor(dev, &freq);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+ pfdevfreq->fast_rate = freq;
+
dev_pm_opp_put(opp);
/*
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.h
b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
index 1514c1f9d91c..48dbe185f206 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.h
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.h
@@ -19,6 +19,9 @@ struct panfrost_devfreq {
struct devfreq_simple_ondemand_data gov_data;
bool opp_of_table_added;
+ unsigned long current_frequency;
+ unsigned long fast_rate;
+
ktime_t busy_time;
ktime_t idle_time;
ktime_t time_last_update;
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c
b/drivers/gpu/drm/panfrost/panfrost_device.c
index fa1a086a862b..28f7046e1b1a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -207,6 +207,8 @@ int panfrost_device_init(struct panfrost_device
*pfdev)
spin_lock_init(&pfdev->as_lock);
+ spin_lock_init(&pfdev->cycle_counter.lock);
+
err = panfrost_clk_init(pfdev);
if (err) {
dev_err(pfdev->dev, "clk init failed %d\n", err);
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h
b/drivers/gpu/drm/panfrost/panfrost_device.h
index b0126b9fbadc..1e85656dc2f7 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -107,6 +107,7 @@ struct panfrost_device {
struct list_head scheduled_jobs;
struct panfrost_perfcnt *perfcnt;
+ atomic_t profile_mode;
struct mutex sched_lock;
@@ -121,6 +122,11 @@ struct panfrost_device {
struct shrinker shrinker;
struct panfrost_devfreq pfdevfreq;
+
+ struct {
+ atomic_t use_count;
+ spinlock_t lock;
+ } cycle_counter;
};
struct panfrost_mmu {
@@ -135,12 +141,19 @@ struct panfrost_mmu {
struct list_head list;
};
+struct panfrost_engine_usage {
+ unsigned long long elapsed_ns[NUM_JOB_SLOTS];
+ unsigned long long cycles[NUM_JOB_SLOTS];
+};
+
struct panfrost_file_priv {
struct panfrost_device *pfdev;
struct drm_sched_entity sched_entity[NUM_JOB_SLOTS];
struct panfrost_mmu *mmu;
+
+ struct panfrost_engine_usage engine_usage;
};
static inline struct panfrost_device *to_panfrost_device(struct
drm_device *ddev)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c
b/drivers/gpu/drm/panfrost/panfrost_drv.c
index a2ab99698ca8..3c93a11deab1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -20,6 +20,7 @@
#include "panfrost_job.h"
#include "panfrost_gpu.h"
#include "panfrost_perfcnt.h"
+#include "panfrost_debugfs.h"
static bool unstable_ioctls;
module_param_unsafe(unstable_ioctls, bool, 0600);
@@ -267,6 +268,7 @@ static int panfrost_ioctl_submit(struct
drm_device *dev, void *data,
job->requirements = args->requirements;
job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
job->mmu = file_priv->mmu;
+ job->engine_usage = &file_priv->engine_usage;
slot = panfrost_job_get_slot(job);
@@ -523,7 +525,55 @@ static const struct drm_ioctl_desc
panfrost_drm_driver_ioctls[] = {
PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW),
};
-DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops);
+
+static void panfrost_gpu_show_fdinfo(struct panfrost_device *pfdev,
+ struct panfrost_file_priv *panfrost_priv,
+ struct drm_printer *p)
+{
+ int i;
+
+ /*
+ * IMPORTANT NOTE: drm-cycles and drm-engine measurements are not
+ * accurate, as they only provide a rough estimation of the
number of
+ * GPU cycles and CPU time spent in a given context. This is
due to two
+ * different factors:
+ * - Firstly, we must consider the time the CPU and then the
kernel
+ * takes to process the GPU interrupt, which means additional
time and
+ * GPU cycles will be added in excess to the real figure.
+ * - Secondly, the pipelining done by the Job Manager (2 job
slots per
+ * engine) implies there is no way to know exactly how much
time each
+ * job spent on the GPU.
+ */
+
+ static const char * const engine_names[] = {
+ "fragment", "vertex-tiler", "compute-only"
+ };
+
+ for (i = 0; i < NUM_JOB_SLOTS - 1; i++) {