On Tue, Aug 27, 2019 at 03:08:20PM +0000, Gerald BAEZA wrote: > The DDRPERFM is the DDR Performance Monitor embedded in STM32MP1 SOC. > > This perf drivers supports the read, write, activate, idle and total > time counters, described in the reference manual RM0436 that is > accessible from Documentation/arm/stm32/stm32mp157-overview.rst > > Signed-off-by: Gerald Baeza <gerald.baeza@xxxxxx> > --- > drivers/perf/Kconfig | 6 + > drivers/perf/Makefile | 1 + > drivers/perf/stm32_ddr_pmu.c | 426 +++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 433 insertions(+) > create mode 100644 drivers/perf/stm32_ddr_pmu.c > > diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig > index 09ae8a9..a3d917e 100644 > --- a/drivers/perf/Kconfig > +++ b/drivers/perf/Kconfig > @@ -114,6 +114,12 @@ config THUNDERX2_PMU > The SoC has PMU support in its L3 cache controller (L3C) and > in the DDR4 Memory Controller (DMC). > > +config STM32_DDR_PMU > + tristate "STM32 DDR PMU" > + depends on MACH_STM32MP157 > + help > + Support for STM32 DDR performance monitor (DDRPERFM). Weird indentation here (spaces not tabes?). > config XGENE_PMU > depends on ARCH_XGENE > bool "APM X-Gene SoC PMU" > diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile > index 2ebb4de..fd3368c 100644 > --- a/drivers/perf/Makefile > +++ b/drivers/perf/Makefile > @@ -9,6 +9,7 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o > obj-$(CONFIG_HISI_PMU) += hisilicon/ > obj-$(CONFIG_QCOM_L2_PMU) += qcom_l2_pmu.o > obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o > +obj-$(CONFIG_STM32_DDR_PMU) += stm32_ddr_pmu.o > obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o > obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o > obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o > diff --git a/drivers/perf/stm32_ddr_pmu.c b/drivers/perf/stm32_ddr_pmu.c > new file mode 100644 > index 0000000..d0480e0 > --- /dev/null > +++ b/drivers/perf/stm32_ddr_pmu.c > @@ -0,0 +1,426 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * This file is the STM32 DDR performance monitor (DDRPERFM) driver > + * > + * Copyright (C) 2019, STMicroelectronics - All Rights Reserved > + * Author: Gerald Baeza <gerald.baeza@xxxxxx> > + */ > + > +#include <linux/clk.h> > +#include <linux/delay.h> > +#include <linux/hrtimer.h> > +#include <linux/io.h> > +#include <linux/module.h> > +#include <linux/of_platform.h> > +#include <linux/perf_event.h> > +#include <linux/reset.h> > +#include <linux/slab.h> > +#include <linux/types.h> > + > +/* > + * The PMU is able to freeze all counters and generate an interrupt when there > + * is a counter overflow. But, relying on this means that we lose all the > + * events that occur between the freeze and the interrupt handler execution. > + * So we use a polling mechanism to avoid this lose of information. > + * The fastest counter can overflow in ~8s @533MHz (that is the maximum DDR > + * frequency supported on STM32MP157), so we poll in 4s intervals to ensure > + * we don't reach this limit. > + */ > +#define POLL_MS 4000 > + > +#define DDRPERFM_CTL 0x000 > +#define DDRPERFM_CFG 0x004 > +#define DDRPERFM_STATUS 0x008 > +#define DDRPERFM_CCR 0x00C > +#define DDRPERFM_IER 0x010 > +#define DDRPERFM_ISR 0x014 > +#define DDRPERFM_ICR 0x018 > +#define DDRPERFM_TCNT 0x020 > +#define DDRPERFM_CNT(X) (0x030 + 8 * (X)) > +#define DDRPERFM_HWCFG 0x3F0 > +#define DDRPERFM_VER 0x3F4 > +#define DDRPERFM_ID 0x3F8 > +#define DDRPERFM_SID 0x3FC > + > +#define CTL_START 0x00000001 > +#define CTL_STOP 0x00000002 > +#define CCR_CLEAR_ALL 0x8000000F > +#define SID_MAGIC_ID 0xA3C5DD01 What's this for? The check during probe looks weird. > + > +enum { > + READ_CNT, > + WRITE_CNT, > + ACTIVATE_CNT, > + IDLE_CNT, > + TIME_CNT, > + PMU_NR_COUNTERS > +}; I think these correspond directly to the values set by userspace in attr.config, so you probably want to clamp attr.config to be < PMU_NR_COUNTERS in stm32_ddr_pmu_event_init(). > +struct stm32_ddr_pmu { > + struct pmu pmu; > + void __iomem *membase; > + struct clk *clk; > + struct hrtimer hrtimer; > + cpumask_t pmu_cpu; > + ktime_t poll_period; > + struct perf_event *events[PMU_NR_COUNTERS]; > + u64 events_cnt[PMU_NR_COUNTERS]; > +}; > + > +static inline struct stm32_ddr_pmu *pmu_to_stm32_ddr_pmu(struct pmu *p) > +{ > + return container_of(p, struct stm32_ddr_pmu, pmu); > +} > + > +static inline struct stm32_ddr_pmu *hrtimer_to_stm32_ddr_pmu(struct hrtimer *h) > +{ > + return container_of(h, struct stm32_ddr_pmu, hrtimer); > +} > + > +static void stm32_ddr_pmu_event_configure(struct perf_event *event) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu); > + unsigned long config_base = event->hw.config_base; > + u32 val; > + > + writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL); > + > + if (config_base < TIME_CNT) { > + val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_CFG); > + val |= (1 << config_base); > + writel_relaxed(val, stm32_ddr_pmu->membase + DDRPERFM_CFG); > + } > +} > + > +static void stm32_ddr_pmu_event_read(struct perf_event *event) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu); > + unsigned long config_base = event->hw.config_base; > + struct hw_perf_event *hw = &event->hw; > + u64 prev_count, new_count, mask; > + u32 val, offset, bit; > + > + writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL); > + > + if (config_base == TIME_CNT) { > + offset = DDRPERFM_TCNT; > + bit = 1 << 31; > + } else { > + offset = DDRPERFM_CNT(config_base); > + bit = 1 << config_base; > + } > + val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_STATUS); > + if (val & bit) > + pr_warn("STM32 DDR PMU hardware counter overflow\n"); I don't think this print is useful. Surely overflow is fatal and you should do something like put the event into an error state? > + val = readl_relaxed(stm32_ddr_pmu->membase + offset); > + writel_relaxed(bit, stm32_ddr_pmu->membase + DDRPERFM_CCR); > + writel_relaxed(CTL_START, stm32_ddr_pmu->membase + DDRPERFM_CTL); > + > + do { > + prev_count = local64_read(&hw->prev_count); > + new_count = prev_count + val; > + } while (local64_xchg(&hw->prev_count, new_count) != prev_count); > + > + mask = GENMASK_ULL(31, 0); > + local64_add(val & mask, &event->count); > + > + if (new_count < prev_count) > + pr_warn("STM32 DDR PMU software counter rollover\n"); These are 64-bit. How fast do you expect the counters to tick? > +static void stm32_ddr_pmu_event_start(struct perf_event *event, int flags) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu); > + struct hw_perf_event *hw = &event->hw; > + > + if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED))) > + return; > + > + if (flags & PERF_EF_RELOAD) > + WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE)); > + > + stm32_ddr_pmu_event_configure(event); > + > + /* Clear all counters to synchronize them, then start */ > + writel_relaxed(CCR_CLEAR_ALL, stm32_ddr_pmu->membase + DDRPERFM_CCR); > + writel_relaxed(CTL_START, stm32_ddr_pmu->membase + DDRPERFM_CTL); > + local64_set(&hw->prev_count, 0); > + hw->state = 0; > +} > + > +static void stm32_ddr_pmu_event_stop(struct perf_event *event, int flags) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu); > + unsigned long config_base = event->hw.config_base; > + struct hw_perf_event *hw = &event->hw; > + u32 val, bit; > + > + if (WARN_ON_ONCE(hw->state & PERF_HES_STOPPED)) > + return; > + > + writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL); > + if (config_base == TIME_CNT) > + bit = 1 << 31; > + else > + bit = 1 << config_base; > + writel_relaxed(bit, stm32_ddr_pmu->membase + DDRPERFM_CCR); > + if (config_base < TIME_CNT) { > + val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_CFG); > + val &= ~bit; > + writel_relaxed(val, stm32_ddr_pmu->membase + DDRPERFM_CFG); > + } > + > + hw->state |= PERF_HES_STOPPED; > + > + if (flags & PERF_EF_UPDATE) { > + stm32_ddr_pmu_event_read(event); > + hw->state |= PERF_HES_UPTODATE; > + } > +} > + > +static int stm32_ddr_pmu_event_add(struct perf_event *event, int flags) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu); > + unsigned long config_base = event->hw.config_base; > + struct hw_perf_event *hw = &event->hw; > + > + stm32_ddr_pmu->events_cnt[config_base] = 0; > + stm32_ddr_pmu->events[config_base] = event; > + > + clk_enable(stm32_ddr_pmu->clk); > + /* > + * Pin the timer, so that the overflows are handled by the chosen > + * event->cpu (this is the same one as presented in "cpumask" > + * attribute). > + */ > + hrtimer_start(&stm32_ddr_pmu->hrtimer, stm32_ddr_pmu->poll_period, > + HRTIMER_MODE_REL_PINNED); > + > + stm32_ddr_pmu_event_configure(event); > + > + hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; > + > + if (flags & PERF_EF_START) > + stm32_ddr_pmu_event_start(event, 0); > + > + return 0; > +} > + > +static void stm32_ddr_pmu_event_del(struct perf_event *event, int flags) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu); > + unsigned long config_base = event->hw.config_base; > + bool stop = true; > + int i; > + > + stm32_ddr_pmu_event_stop(event, PERF_EF_UPDATE); > + > + stm32_ddr_pmu->events_cnt[config_base] += local64_read(&event->count); > + stm32_ddr_pmu->events[config_base] = NULL; > + > + for (i = 0; i < PMU_NR_COUNTERS; i++) > + if (stm32_ddr_pmu->events[i]) > + stop = false; > + if (stop) This is just i == PMU_NR_COUNTERS if you add a break in the if clause. > + hrtimer_cancel(&stm32_ddr_pmu->hrtimer); > + > + clk_disable(stm32_ddr_pmu->clk); > +} > + > +static int stm32_ddr_pmu_event_init(struct perf_event *event) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu); > + struct hw_perf_event *hw = &event->hw; > + > + if (event->attr.type != event->pmu->type) > + return -ENOENT; > + > + if (is_sampling_event(event)) > + return -EINVAL; > + > + if (event->attach_state & PERF_ATTACH_TASK) > + return -EINVAL; > + > + if (event->attr.exclude_user || > + event->attr.exclude_kernel || > + event->attr.exclude_hv || > + event->attr.exclude_idle || > + event->attr.exclude_host || > + event->attr.exclude_guest) > + return -EINVAL; > + > + if (event->cpu < 0) > + return -EINVAL; > + > + hw->config_base = event->attr.config; > + event->cpu = cpumask_first(&stm32_ddr_pmu->pmu_cpu); > + > + return 0; > +} > + > +static enum hrtimer_restart stm32_ddr_pmu_poll(struct hrtimer *hrtimer) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu = hrtimer_to_stm32_ddr_pmu(hrtimer); > + int i; > + > + for (i = 0; i < PMU_NR_COUNTERS; i++) > + if (stm32_ddr_pmu->events[i]) > + stm32_ddr_pmu_event_read(stm32_ddr_pmu->events[i]); > + > + hrtimer_forward_now(hrtimer, stm32_ddr_pmu->poll_period); > + > + return HRTIMER_RESTART; > +} > + > +static ssize_t stm32_ddr_pmu_sysfs_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct dev_ext_attribute *eattr; > + > + eattr = container_of(attr, struct dev_ext_attribute, attr); > + > + return sprintf(buf, "config=0x%lx\n", (unsigned long)eattr->var); > +} Will you ever want to use other bits in the config to configure the PMU? If so, perhaps its worth carving out a smaller event field, a bit like fsl_imx8_ddr_perf.c does. > + > +#define STM32_DDR_PMU_ATTR(_name, _func, _config) \ > + (&((struct dev_ext_attribute[]) { \ > + { __ATTR(_name, 0444, _func, NULL), (void *)_config } \ > + })[0].attr.attr) > + > +#define STM32_DDR_PMU_EVENT_ATTR(_name, _config) \ > + STM32_DDR_PMU_ATTR(_name, stm32_ddr_pmu_sysfs_show, \ > + (unsigned long)_config) > + > +static struct attribute *stm32_ddr_pmu_event_attrs[] = { > + STM32_DDR_PMU_EVENT_ATTR(read_cnt, READ_CNT), > + STM32_DDR_PMU_EVENT_ATTR(write_cnt, WRITE_CNT), > + STM32_DDR_PMU_EVENT_ATTR(activate_cnt, ACTIVATE_CNT), > + STM32_DDR_PMU_EVENT_ATTR(idle_cnt, IDLE_CNT), > + STM32_DDR_PMU_EVENT_ATTR(time_cnt, TIME_CNT), > + NULL > +}; > + > +static struct attribute_group stm32_ddr_pmu_event_attrs_group = { > + .name = "events", > + .attrs = stm32_ddr_pmu_event_attrs, > +}; > + > +static const struct attribute_group *stm32_ddr_pmu_attr_groups[] = { > + &stm32_ddr_pmu_event_attrs_group, > + NULL, > +}; > + > +static int stm32_ddr_pmu_device_probe(struct platform_device *pdev) > +{ > + struct stm32_ddr_pmu *stm32_ddr_pmu; > + struct reset_control *rst; > + struct resource *res; > + int i, ret; > + u32 val; > + > + stm32_ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(struct stm32_ddr_pmu), > + GFP_KERNEL); > + if (!stm32_ddr_pmu) > + return -ENOMEM; > + platform_set_drvdata(pdev, stm32_ddr_pmu); > + > + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); > + stm32_ddr_pmu->membase = devm_ioremap_resource(&pdev->dev, res); > + if (IS_ERR(stm32_ddr_pmu->membase)) { > + pr_warn("Unable to get STM32 DDR PMU membase\n"); > + return PTR_ERR(stm32_ddr_pmu->membase); > + } > + > + stm32_ddr_pmu->clk = devm_clk_get(&pdev->dev, NULL); > + if (IS_ERR(stm32_ddr_pmu->clk)) { > + pr_warn("Unable to get STM32 DDR PMU clock\n"); > + return PTR_ERR(stm32_ddr_pmu->clk); > + } > + > + ret = clk_prepare_enable(stm32_ddr_pmu->clk); > + if (ret) { > + pr_warn("Unable to prepare STM32 DDR PMU clock\n"); > + return ret; > + } > + > + stm32_ddr_pmu->poll_period = ms_to_ktime(POLL_MS); > + hrtimer_init(&stm32_ddr_pmu->hrtimer, CLOCK_MONOTONIC, > + HRTIMER_MODE_REL); I would /much/ prefer for the timer to be handled by the perf core automatically when a PMU is registered with PERF_PMU_CAP_NO_INTERRUPT. That way, other drivers can benefit from this without tonnes of code duplication. > + stm32_ddr_pmu->hrtimer.function = stm32_ddr_pmu_poll; > + > + /* > + * The PMU is assigned to the cpu0 and there is no need to manage cpu > + * hot plug migration because cpu0 is always the first/last active cpu > + * during low power transitions. > + */ > + cpumask_set_cpu(0, &stm32_ddr_pmu->pmu_cpu); > + > + for (i = 0; i < PMU_NR_COUNTERS; i++) { > + stm32_ddr_pmu->events[i] = NULL; > + stm32_ddr_pmu->events_cnt[i] = 0; > + } > + > + val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_SID); > + if (val != SID_MAGIC_ID) > + return -EINVAL; > + > + stm32_ddr_pmu->pmu = (struct pmu) { > + .task_ctx_nr = perf_invalid_context, > + .start = stm32_ddr_pmu_event_start, > + .stop = stm32_ddr_pmu_event_stop, > + .add = stm32_ddr_pmu_event_add, > + .del = stm32_ddr_pmu_event_del, > + .event_init = stm32_ddr_pmu_event_init, > + .attr_groups = stm32_ddr_pmu_attr_groups, > + }; > + ret = perf_pmu_register(&stm32_ddr_pmu->pmu, "stm32_ddr_pmu", -1); You might want an index on the end of this name in case you ever want to support more than one in a given SoC. > + if (ret) { > + pr_warn("Unable to register STM32 DDR PMU\n"); > + return ret; > + } > + > + rst = devm_reset_control_get_exclusive(&pdev->dev, NULL); > + if (!IS_ERR(rst)) { > + reset_control_assert(rst); > + udelay(2); > + reset_control_deassert(rst); > + } > + > + pr_info("stm32-ddr-pmu: probed (DDRPERFM ID=0x%08x VER=0x%08x)\n", > + readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_ID), > + readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_VER)); dev_info(). Similarly for many of your other pr_*() calls. Will