[PATCH 3/5] perf: stm32: ddrperfm driver creation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The DDRPERFM is the DDR Performance Monitor embedded in STM32MP1 SOC.

This perf drivers supports the read, write, activate, idle and total
time counters, described in the reference manual RM0436.

A 'bandwidth' attribute is added in the 'ddrperfm' event_source in order
to directly get the read and write bandwidths (in MB/s), from the last
read, write and total time counters reading.
This attribute is aside the 'events' attributes group because it is not
a counter, as seen by perf tool.

Signed-off-by: Gerald Baeza <gerald.baeza@xxxxxx>
---
 drivers/perf/Kconfig         |   6 +
 drivers/perf/Makefile        |   1 +
 drivers/perf/stm32_ddr_pmu.c | 502 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 509 insertions(+)
 create mode 100644 drivers/perf/stm32_ddr_pmu.c

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index af9bc17..d2dde80 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -96,6 +96,12 @@ config THUNDERX2_PMU
 	   The SoC has PMU support in its L3 cache controller (L3C) and
 	   in the DDR4 Memory Controller (DMC).
 
+config STM32_DDR_PMU
+       tristate "STM32 DDR PMU"
+       depends on MACH_STM32MP157
+       help
+         Support for STM32 DDR performance monitor (DDRPERFM).
+
 config XGENE_PMU
         depends on ARCH_XGENE
         bool "APM X-Gene SoC PMU"
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 909f27f..67505e1 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
 obj-$(CONFIG_HISI_PMU) += hisilicon/
 obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
 obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+obj-$(CONFIG_STM32_DDR_PMU) += stm32_ddr_pmu.o
 obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
 obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
diff --git a/drivers/perf/stm32_ddr_pmu.c b/drivers/perf/stm32_ddr_pmu.c
new file mode 100644
index 0000000..a7a6487
--- /dev/null
+++ b/drivers/perf/stm32_ddr_pmu.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file is the STM32 DDR performance monitor (DDRPERFM) driver
+ *
+ * Copyright (C) 2019, STMicroelectronics - All Rights Reserved
+ * Author: Gerald Baeza <gerald.baeza@xxxxxx>
+ */
+
+#include <linux/clk.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define POLL_MS		4000	/* The counter roll over after ~8s @533MHz */
+#define WORD_LENGTH	4	/* Bytes */
+#define BURST_LENGTH	8	/* Words */
+
+#define DDRPERFM_CTL	0x000
+#define DDRPERFM_CFG	0x004
+#define DDRPERFM_STATUS 0x008
+#define DDRPERFM_CCR	0x00C
+#define DDRPERFM_IER	0x010
+#define DDRPERFM_ISR	0x014
+#define DDRPERFM_ICR	0x018
+#define DDRPERFM_TCNT	0x020
+#define DDRPERFM_CNT(X)	(0x030 + 8 * (X))
+#define DDRPERFM_HWCFG	0x3F0
+#define DDRPERFM_VER	0x3F4
+#define DDRPERFM_ID	0x3F8
+#define DDRPERFM_SID	0x3FC
+
+#define CTL_START	0x00000001
+#define CTL_STOP	0x00000002
+#define CCR_CLEAR_ALL	0x8000000F
+#define SID_MAGIC_ID	0xA3C5DD01
+
+#define STRING "Read = %llu, Write = %llu, Read & Write = %llu (MB/s)\n"
+
+enum {
+	READ_CNT,
+	WRITE_CNT,
+	ACTIVATE_CNT,
+	IDLE_CNT,
+	TIME_CNT,
+	PMU_NR_COUNTERS
+};
+
+struct stm32_ddr_pmu {
+	struct pmu pmu;
+	void __iomem *membase;
+	struct clk *clk;
+	struct clk *clk_ddr;
+	unsigned long clk_ddr_rate;
+	struct hrtimer hrtimer;
+	ktime_t poll_period;
+	spinlock_t lock; /* for shared registers access */
+	struct perf_event *events[PMU_NR_COUNTERS];
+	u64 events_cnt[PMU_NR_COUNTERS];
+};
+
+static inline struct stm32_ddr_pmu *pmu_to_stm32_ddr_pmu(struct pmu *p)
+{
+	return container_of(p, struct stm32_ddr_pmu, pmu);
+}
+
+static inline struct stm32_ddr_pmu *hrtimer_to_stm32_ddr_pmu(struct hrtimer *h)
+{
+	return container_of(h, struct stm32_ddr_pmu, hrtimer);
+}
+
+static u64 stm32_ddr_pmu_compute_bw(struct stm32_ddr_pmu *stm32_ddr_pmu,
+				    int counter)
+{
+	u64 bw = stm32_ddr_pmu->events_cnt[counter], tmp;
+	u64 div = stm32_ddr_pmu->events_cnt[TIME_CNT];
+	u32 prediv = 1, premul = 1;
+
+	if (bw && div) {
+		/* Maximize the dividend into 64 bits */
+		while ((bw < 0x8000000000000000ULL) &&
+		       (premul < 0x40000000UL)) {
+			bw = bw << 1;
+			premul *= 2;
+		}
+		/* Minimize the dividor to fit in 32 bits */
+		while ((div > 0xffffffffUL) && (prediv < 0x40000000UL)) {
+			div = div >> 1;
+			prediv *= 2;
+		}
+		/* Divide counter per time and multiply per DDR settings */
+		do_div(bw, div);
+		tmp = bw * BURST_LENGTH * WORD_LENGTH;
+		tmp *= stm32_ddr_pmu->clk_ddr_rate;
+		if (tmp < bw)
+			goto error;
+		bw = tmp;
+		/* Cancel the prediv and premul factors */
+		while (prediv > 1) {
+			bw = bw >> 1;
+			prediv /= 2;
+		}
+		while (premul > 1) {
+			bw = bw >> 1;
+			premul /= 2;
+		}
+		/* Convert MHz to Hz and B to MB, to finally get MB/s */
+		tmp = bw * 1000000;
+		if (tmp < bw)
+			goto error;
+		bw = tmp;
+		premul = 1024 * 1024;
+		while (premul > 1) {
+			bw = bw >> 1;
+			premul /= 2;
+		}
+	}
+	return bw;
+
+error:
+	pr_warn("stm32-ddr-pmu: overflow detected\n");
+	return 0;
+}
+
+static void stm32_ddr_pmu_event_configure(struct perf_event *event)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
+	unsigned long lock_flags, config_base = event->hw.config_base;
+	u32 val;
+
+	spin_lock_irqsave(&stm32_ddr_pmu->lock, lock_flags);
+	writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL);
+
+	if (config_base < TIME_CNT) {
+		val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_CFG);
+		val |= (1 << config_base);
+		writel_relaxed(val, stm32_ddr_pmu->membase + DDRPERFM_CFG);
+	}
+	spin_unlock_irqrestore(&stm32_ddr_pmu->lock, lock_flags);
+}
+
+static void stm32_ddr_pmu_event_read(struct perf_event *event)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
+	unsigned long flags, config_base = event->hw.config_base;
+	struct hw_perf_event *hw = &event->hw;
+	u64 prev_count, new_count, mask;
+	u32 val, offset, bit;
+
+	spin_lock_irqsave(&stm32_ddr_pmu->lock, flags);
+
+	writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL);
+
+	if (config_base == TIME_CNT) {
+		offset = DDRPERFM_TCNT;
+		bit = 1 << 31;
+	} else {
+		offset = DDRPERFM_CNT(config_base);
+		bit = 1 << config_base;
+	}
+	val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_STATUS);
+	if (val & bit)
+		pr_warn("stm32_ddr_pmu hardware overflow\n");
+	val = readl_relaxed(stm32_ddr_pmu->membase + offset);
+	writel_relaxed(bit, stm32_ddr_pmu->membase + DDRPERFM_CCR);
+	writel_relaxed(CTL_START, stm32_ddr_pmu->membase + DDRPERFM_CTL);
+
+	do {
+		prev_count = local64_read(&hw->prev_count);
+		new_count = prev_count + val;
+	} while (local64_xchg(&hw->prev_count, new_count) != prev_count);
+
+	mask = GENMASK_ULL(31, 0);
+	local64_add(val & mask, &event->count);
+
+	if (new_count < prev_count)
+		pr_warn("STM32 DDR PMU counter saturated\n");
+
+	spin_unlock_irqrestore(&stm32_ddr_pmu->lock, flags);
+}
+
+static void stm32_ddr_pmu_event_start(struct perf_event *event, int flags)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
+	struct hw_perf_event *hw = &event->hw;
+	unsigned long lock_flags;
+
+	if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
+
+	stm32_ddr_pmu_event_configure(event);
+
+	/* Clear all counters to synchronize them, then start */
+	spin_lock_irqsave(&stm32_ddr_pmu->lock, lock_flags);
+	writel_relaxed(CCR_CLEAR_ALL, stm32_ddr_pmu->membase + DDRPERFM_CCR);
+	writel_relaxed(CTL_START, stm32_ddr_pmu->membase + DDRPERFM_CTL);
+	spin_unlock_irqrestore(&stm32_ddr_pmu->lock, lock_flags);
+
+	hw->state = 0;
+}
+
+static void stm32_ddr_pmu_event_stop(struct perf_event *event, int flags)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
+	unsigned long lock_flags, config_base = event->hw.config_base;
+	struct hw_perf_event *hw = &event->hw;
+	u32 val, bit;
+
+	if (WARN_ON_ONCE(hw->state & PERF_HES_STOPPED))
+		return;
+
+	spin_lock_irqsave(&stm32_ddr_pmu->lock, lock_flags);
+	writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL);
+	if (config_base == TIME_CNT)
+		bit = 1 << 31;
+	else
+		bit = 1 << config_base;
+	writel_relaxed(bit, stm32_ddr_pmu->membase + DDRPERFM_CCR);
+	if (config_base < TIME_CNT) {
+		val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_CFG);
+		val &= ~bit;
+		writel_relaxed(val, stm32_ddr_pmu->membase + DDRPERFM_CFG);
+	}
+	spin_unlock_irqrestore(&stm32_ddr_pmu->lock, lock_flags);
+
+	hw->state |= PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_UPDATE) {
+		stm32_ddr_pmu_event_read(event);
+		hw->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int stm32_ddr_pmu_event_add(struct perf_event *event, int flags)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
+	unsigned long config_base = event->hw.config_base;
+	struct hw_perf_event *hw = &event->hw;
+
+	stm32_ddr_pmu->events_cnt[config_base] = 0;
+	stm32_ddr_pmu->events[config_base] = event;
+
+	clk_enable(stm32_ddr_pmu->clk);
+	hrtimer_start(&stm32_ddr_pmu->hrtimer, stm32_ddr_pmu->poll_period,
+		      HRTIMER_MODE_REL);
+
+	stm32_ddr_pmu_event_configure(event);
+
+	hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+	if (flags & PERF_EF_START)
+		stm32_ddr_pmu_event_start(event, 0);
+
+	return 0;
+}
+
+static void stm32_ddr_pmu_event_del(struct perf_event *event, int flags)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
+	unsigned long config_base = event->hw.config_base;
+	bool stop = true;
+	int i;
+
+	stm32_ddr_pmu_event_stop(event, PERF_EF_UPDATE);
+
+	stm32_ddr_pmu->events_cnt[config_base] += local64_read(&event->count);
+	stm32_ddr_pmu->events[config_base] = NULL;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++)
+		if (stm32_ddr_pmu->events[i])
+			stop = false;
+	if (stop)
+		hrtimer_cancel(&stm32_ddr_pmu->hrtimer);
+
+	clk_disable(stm32_ddr_pmu->clk);
+}
+
+static int stm32_ddr_pmu_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hw = &event->hw;
+
+	if (is_sampling_event(event))
+		return -EINVAL;
+
+	if (event->attach_state & PERF_ATTACH_TASK)
+		return -EINVAL;
+
+	if (event->attr.exclude_user   ||
+	    event->attr.exclude_kernel ||
+	    event->attr.exclude_hv     ||
+	    event->attr.exclude_idle   ||
+	    event->attr.exclude_host   ||
+	    event->attr.exclude_guest)
+		return -EINVAL;
+
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	hw->config_base = event->attr.config;
+
+	return 0;
+}
+
+static enum hrtimer_restart stm32_ddr_pmu_poll(struct hrtimer *hrtimer)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = hrtimer_to_stm32_ddr_pmu(hrtimer);
+	int i;
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++)
+		if (stm32_ddr_pmu->events[i])
+			stm32_ddr_pmu_event_read(stm32_ddr_pmu->events[i]);
+
+	hrtimer_forward_now(hrtimer, stm32_ddr_pmu->poll_period);
+
+	return HRTIMER_RESTART;
+}
+
+static ssize_t stm32_ddr_pmu_sysfs_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct dev_ext_attribute *eattr;
+
+	eattr = container_of(attr, struct dev_ext_attribute, attr);
+
+	return sprintf(buf, "config=0x%lx\n", (unsigned long)eattr->var);
+}
+
+static ssize_t bandwidth_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = dev_get_drvdata(dev);
+	u64 r_bw, w_bw;
+	int ret;
+
+	if (stm32_ddr_pmu->events_cnt[TIME_CNT]) {
+		r_bw = stm32_ddr_pmu_compute_bw(stm32_ddr_pmu, READ_CNT);
+		w_bw = stm32_ddr_pmu_compute_bw(stm32_ddr_pmu, WRITE_CNT);
+
+		ret = snprintf(buf, PAGE_SIZE, STRING,
+			       r_bw, w_bw, (r_bw + w_bw));
+	} else {
+		ret = snprintf(buf, PAGE_SIZE, "No data available\n");
+	}
+
+	return ret;
+}
+
+#define STM32_DDR_PMU_ATTR(_name, _func, _config)			\
+	(&((struct dev_ext_attribute[]) {				\
+		{ __ATTR(_name, 0444, _func, NULL), (void *)_config }   \
+	})[0].attr.attr)
+
+#define STM32_DDR_PMU_EVENT_ATTR(_name, _config)		\
+	STM32_DDR_PMU_ATTR(_name, stm32_ddr_pmu_sysfs_show,	\
+			   (unsigned long)_config)
+
+static struct attribute *stm32_ddr_pmu_event_attrs[] = {
+	STM32_DDR_PMU_EVENT_ATTR(read_cnt, READ_CNT),
+	STM32_DDR_PMU_EVENT_ATTR(write_cnt, WRITE_CNT),
+	STM32_DDR_PMU_EVENT_ATTR(activate_cnt, ACTIVATE_CNT),
+	STM32_DDR_PMU_EVENT_ATTR(idle_cnt, IDLE_CNT),
+	STM32_DDR_PMU_EVENT_ATTR(time_cnt, TIME_CNT),
+	NULL
+};
+
+static DEVICE_ATTR_RO(bandwidth);
+static struct attribute *stm32_ddr_pmu_bandwidth_attrs[] = {
+	&dev_attr_bandwidth.attr,
+	NULL,
+};
+
+static struct attribute_group stm32_ddr_pmu_event_attrs_group = {
+	.name = "events",
+	.attrs = stm32_ddr_pmu_event_attrs,
+};
+
+static struct attribute_group stm32_ddr_pmu_bandwidth_attrs_group = {
+	.attrs = stm32_ddr_pmu_bandwidth_attrs,
+};
+
+static const struct attribute_group *stm32_ddr_pmu_attr_groups[] = {
+	&stm32_ddr_pmu_event_attrs_group,
+	&stm32_ddr_pmu_bandwidth_attrs_group,
+	NULL,
+};
+
+static int stm32_ddr_pmu_device_probe(struct platform_device *pdev)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu;
+	struct resource *res;
+	int i, ret;
+	u32 val;
+
+	stm32_ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(struct stm32_ddr_pmu),
+				     GFP_KERNEL);
+	if (!stm32_ddr_pmu)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, stm32_ddr_pmu);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	stm32_ddr_pmu->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(stm32_ddr_pmu->membase)) {
+		pr_warn("Unable to get STM32 DDR PMU membase\n");
+		return PTR_ERR(stm32_ddr_pmu->membase);
+	}
+
+	stm32_ddr_pmu->clk = devm_clk_get(&pdev->dev, "bus");
+	if (IS_ERR(stm32_ddr_pmu->clk)) {
+		pr_warn("Unable to get STM32 DDR PMU clock\n");
+		return PTR_ERR(stm32_ddr_pmu->clk);
+	}
+
+	ret = clk_prepare_enable(stm32_ddr_pmu->clk);
+	if (ret) {
+		pr_warn("Unable to prepare STM32 DDR PMU clock\n");
+		return ret;
+	}
+
+	stm32_ddr_pmu->clk_ddr = devm_clk_get(&pdev->dev, "ddr");
+	if (IS_ERR(stm32_ddr_pmu->clk_ddr)) {
+		pr_warn("Unable to get STM32 DDR clock\n");
+		return PTR_ERR(stm32_ddr_pmu->clk_ddr);
+	}
+	stm32_ddr_pmu->clk_ddr_rate = clk_get_rate(stm32_ddr_pmu->clk_ddr);
+	stm32_ddr_pmu->clk_ddr_rate /= 1000000;
+
+	stm32_ddr_pmu->poll_period = ms_to_ktime(POLL_MS);
+	hrtimer_init(&stm32_ddr_pmu->hrtimer, CLOCK_MONOTONIC,
+		     HRTIMER_MODE_REL);
+	stm32_ddr_pmu->hrtimer.function = stm32_ddr_pmu_poll;
+	spin_lock_init(&stm32_ddr_pmu->lock);
+
+	for (i = 0; i < PMU_NR_COUNTERS; i++) {
+		stm32_ddr_pmu->events[i] = NULL;
+		stm32_ddr_pmu->events_cnt[i] = 0;
+	}
+
+	val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_SID);
+	if (val != SID_MAGIC_ID)
+		return -EINVAL;
+
+	stm32_ddr_pmu->pmu = (struct pmu) {
+		.task_ctx_nr = perf_invalid_context,
+		.start = stm32_ddr_pmu_event_start,
+		.stop = stm32_ddr_pmu_event_stop,
+		.add = stm32_ddr_pmu_event_add,
+		.del = stm32_ddr_pmu_event_del,
+		.event_init = stm32_ddr_pmu_event_init,
+		.attr_groups = stm32_ddr_pmu_attr_groups,
+	};
+	ret = perf_pmu_register(&stm32_ddr_pmu->pmu, "ddrperfm", -1);
+	if (ret) {
+		pr_warn("Unable to register STM32 DDR PMU\n");
+		return ret;
+	}
+
+	pr_info("stm32-ddr-pmu: probed (ID=0x%08x VER=0x%08x), DDR@%luMHz\n",
+		readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_ID),
+		readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_VER),
+		stm32_ddr_pmu->clk_ddr_rate);
+
+	clk_disable(stm32_ddr_pmu->clk);
+
+	return 0;
+}
+
+static int stm32_ddr_pmu_device_remove(struct platform_device *pdev)
+{
+	struct stm32_ddr_pmu *stm32_ddr_pmu = platform_get_drvdata(pdev);
+
+	perf_pmu_unregister(&stm32_ddr_pmu->pmu);
+
+	return 0;
+}
+
+static const struct of_device_id stm32_ddr_pmu_of_match[] = {
+	{ .compatible = "st,stm32-ddr-pmu" },
+	{ },
+};
+
+static struct platform_driver stm32_ddr_pmu_driver = {
+	.driver = {
+		.name	= "stm32-ddr-pmu",
+		.of_match_table = of_match_ptr(stm32_ddr_pmu_of_match),
+	},
+	.probe = stm32_ddr_pmu_device_probe,
+	.remove = stm32_ddr_pmu_device_remove,
+};
+
+module_platform_driver(stm32_ddr_pmu_driver);
+
+MODULE_DESCRIPTION("Perf driver for STM32 DDR performance monitor");
+MODULE_AUTHOR("Gerald Baeza <gerald.baeza@xxxxxx>");
+MODULE_LICENSE("GPL v2");
-- 
2.7.4




[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux