Re: [PATCH v3 3/5] perf: stm32: ddrperfm driver creation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Aug 27, 2019 at 03:08:20PM +0000, Gerald BAEZA wrote:
> The DDRPERFM is the DDR Performance Monitor embedded in STM32MP1 SOC.
> 
> This perf drivers supports the read, write, activate, idle and total
> time counters, described in the reference manual RM0436 that is
> accessible from Documentation/arm/stm32/stm32mp157-overview.rst
> 
> Signed-off-by: Gerald Baeza <gerald.baeza@xxxxxx>
> ---
>  drivers/perf/Kconfig         |   6 +
>  drivers/perf/Makefile        |   1 +
>  drivers/perf/stm32_ddr_pmu.c | 426 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 433 insertions(+)
>  create mode 100644 drivers/perf/stm32_ddr_pmu.c
> 
> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> index 09ae8a9..a3d917e 100644
> --- a/drivers/perf/Kconfig
> +++ b/drivers/perf/Kconfig
> @@ -114,6 +114,12 @@ config THUNDERX2_PMU
>  	   The SoC has PMU support in its L3 cache controller (L3C) and
>  	   in the DDR4 Memory Controller (DMC).
>  
> +config STM32_DDR_PMU
> +       tristate "STM32 DDR PMU"
> +       depends on MACH_STM32MP157
> +       help
> +         Support for STM32 DDR performance monitor (DDRPERFM).

Weird indentation here (spaces not tabes?).

>  config XGENE_PMU
>          depends on ARCH_XGENE
>          bool "APM X-Gene SoC PMU"
> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
> index 2ebb4de..fd3368c 100644
> --- a/drivers/perf/Makefile
> +++ b/drivers/perf/Makefile
> @@ -9,6 +9,7 @@ obj-$(CONFIG_FSL_IMX8_DDR_PMU) += fsl_imx8_ddr_perf.o
>  obj-$(CONFIG_HISI_PMU) += hisilicon/
>  obj-$(CONFIG_QCOM_L2_PMU)	+= qcom_l2_pmu.o
>  obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
> +obj-$(CONFIG_STM32_DDR_PMU) += stm32_ddr_pmu.o
>  obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
>  obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
>  obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
> diff --git a/drivers/perf/stm32_ddr_pmu.c b/drivers/perf/stm32_ddr_pmu.c
> new file mode 100644
> index 0000000..d0480e0
> --- /dev/null
> +++ b/drivers/perf/stm32_ddr_pmu.c
> @@ -0,0 +1,426 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * This file is the STM32 DDR performance monitor (DDRPERFM) driver
> + *
> + * Copyright (C) 2019, STMicroelectronics - All Rights Reserved
> + * Author: Gerald Baeza <gerald.baeza@xxxxxx>
> + */
> +
> +#include <linux/clk.h>
> +#include <linux/delay.h>
> +#include <linux/hrtimer.h>
> +#include <linux/io.h>
> +#include <linux/module.h>
> +#include <linux/of_platform.h>
> +#include <linux/perf_event.h>
> +#include <linux/reset.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +
> +/*
> + * The PMU is able to freeze all counters and generate an interrupt when there
> + * is a counter overflow. But, relying on this means that we lose all the
> + * events that occur between the freeze and the interrupt handler execution.
> + * So we use a polling mechanism to avoid this lose of information.
> + * The fastest counter can overflow in ~8s @533MHz (that is the maximum DDR
> + * frequency supported on STM32MP157), so we poll in 4s intervals to ensure
> + * we don't reach this limit.
> + */
> +#define POLL_MS		4000
> +
> +#define DDRPERFM_CTL	0x000
> +#define DDRPERFM_CFG	0x004
> +#define DDRPERFM_STATUS 0x008
> +#define DDRPERFM_CCR	0x00C
> +#define DDRPERFM_IER	0x010
> +#define DDRPERFM_ISR	0x014
> +#define DDRPERFM_ICR	0x018
> +#define DDRPERFM_TCNT	0x020
> +#define DDRPERFM_CNT(X)	(0x030 + 8 * (X))
> +#define DDRPERFM_HWCFG	0x3F0
> +#define DDRPERFM_VER	0x3F4
> +#define DDRPERFM_ID	0x3F8
> +#define DDRPERFM_SID	0x3FC
> +
> +#define CTL_START	0x00000001
> +#define CTL_STOP	0x00000002
> +#define CCR_CLEAR_ALL	0x8000000F
> +#define SID_MAGIC_ID	0xA3C5DD01

What's this for? The check during probe looks weird.

> +
> +enum {
> +	READ_CNT,
> +	WRITE_CNT,
> +	ACTIVATE_CNT,
> +	IDLE_CNT,
> +	TIME_CNT,
> +	PMU_NR_COUNTERS
> +};

I think these correspond directly to the values set by userspace in
attr.config, so you probably want to clamp attr.config to be <
PMU_NR_COUNTERS in stm32_ddr_pmu_event_init().

> +struct stm32_ddr_pmu {
> +	struct pmu pmu;
> +	void __iomem *membase;
> +	struct clk *clk;
> +	struct hrtimer hrtimer;
> +	cpumask_t pmu_cpu;
> +	ktime_t poll_period;
> +	struct perf_event *events[PMU_NR_COUNTERS];
> +	u64 events_cnt[PMU_NR_COUNTERS];
> +};
> +
> +static inline struct stm32_ddr_pmu *pmu_to_stm32_ddr_pmu(struct pmu *p)
> +{
> +	return container_of(p, struct stm32_ddr_pmu, pmu);
> +}
> +
> +static inline struct stm32_ddr_pmu *hrtimer_to_stm32_ddr_pmu(struct hrtimer *h)
> +{
> +	return container_of(h, struct stm32_ddr_pmu, hrtimer);
> +}
> +
> +static void stm32_ddr_pmu_event_configure(struct perf_event *event)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
> +	unsigned long config_base = event->hw.config_base;
> +	u32 val;
> +
> +	writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL);
> +
> +	if (config_base < TIME_CNT) {
> +		val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_CFG);
> +		val |= (1 << config_base);
> +		writel_relaxed(val, stm32_ddr_pmu->membase + DDRPERFM_CFG);
> +	}
> +}
> +
> +static void stm32_ddr_pmu_event_read(struct perf_event *event)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
> +	unsigned long config_base = event->hw.config_base;
> +	struct hw_perf_event *hw = &event->hw;
> +	u64 prev_count, new_count, mask;
> +	u32 val, offset, bit;
> +
> +	writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL);
> +
> +	if (config_base == TIME_CNT) {
> +		offset = DDRPERFM_TCNT;
> +		bit = 1 << 31;
> +	} else {
> +		offset = DDRPERFM_CNT(config_base);
> +		bit = 1 << config_base;
> +	}
> +	val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_STATUS);
> +	if (val & bit)
> +		pr_warn("STM32 DDR PMU hardware counter overflow\n");

I don't think this print is useful. Surely overflow is fatal and you should
do something like put the event into an error state?

> +	val = readl_relaxed(stm32_ddr_pmu->membase + offset);
> +	writel_relaxed(bit, stm32_ddr_pmu->membase + DDRPERFM_CCR);
> +	writel_relaxed(CTL_START, stm32_ddr_pmu->membase + DDRPERFM_CTL);
> +
> +	do {
> +		prev_count = local64_read(&hw->prev_count);
> +		new_count = prev_count + val;
> +	} while (local64_xchg(&hw->prev_count, new_count) != prev_count);
> +
> +	mask = GENMASK_ULL(31, 0);
> +	local64_add(val & mask, &event->count);
> +
> +	if (new_count < prev_count)
> +		pr_warn("STM32 DDR PMU software counter rollover\n");

These are 64-bit. How fast do you expect the counters to tick?

> +static void stm32_ddr_pmu_event_start(struct perf_event *event, int flags)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
> +	struct hw_perf_event *hw = &event->hw;
> +
> +	if (WARN_ON_ONCE(!(hw->state & PERF_HES_STOPPED)))
> +		return;
> +
> +	if (flags & PERF_EF_RELOAD)
> +		WARN_ON_ONCE(!(hw->state & PERF_HES_UPTODATE));
> +
> +	stm32_ddr_pmu_event_configure(event);
> +
> +	/* Clear all counters to synchronize them, then start */
> +	writel_relaxed(CCR_CLEAR_ALL, stm32_ddr_pmu->membase + DDRPERFM_CCR);
> +	writel_relaxed(CTL_START, stm32_ddr_pmu->membase + DDRPERFM_CTL);
> +	local64_set(&hw->prev_count, 0);
> +	hw->state = 0;
> +}
> +
> +static void stm32_ddr_pmu_event_stop(struct perf_event *event, int flags)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
> +	unsigned long config_base = event->hw.config_base;
> +	struct hw_perf_event *hw = &event->hw;
> +	u32 val, bit;
> +
> +	if (WARN_ON_ONCE(hw->state & PERF_HES_STOPPED))
> +		return;
> +
> +	writel_relaxed(CTL_STOP, stm32_ddr_pmu->membase + DDRPERFM_CTL);
> +	if (config_base == TIME_CNT)
> +		bit = 1 << 31;
> +	else
> +		bit = 1 << config_base;
> +	writel_relaxed(bit, stm32_ddr_pmu->membase + DDRPERFM_CCR);
> +	if (config_base < TIME_CNT) {
> +		val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_CFG);
> +		val &= ~bit;
> +		writel_relaxed(val, stm32_ddr_pmu->membase + DDRPERFM_CFG);
> +	}
> +
> +	hw->state |= PERF_HES_STOPPED;
> +
> +	if (flags & PERF_EF_UPDATE) {
> +		stm32_ddr_pmu_event_read(event);
> +		hw->state |= PERF_HES_UPTODATE;
> +	}
> +}
> +
> +static int stm32_ddr_pmu_event_add(struct perf_event *event, int flags)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
> +	unsigned long config_base = event->hw.config_base;
> +	struct hw_perf_event *hw = &event->hw;
> +
> +	stm32_ddr_pmu->events_cnt[config_base] = 0;
> +	stm32_ddr_pmu->events[config_base] = event;
> +
> +	clk_enable(stm32_ddr_pmu->clk);
> +	/*
> +	 * Pin the timer, so that the overflows are handled by the chosen
> +	 * event->cpu (this is the same one as presented in "cpumask"
> +	 * attribute).
> +	 */
> +	hrtimer_start(&stm32_ddr_pmu->hrtimer, stm32_ddr_pmu->poll_period,
> +		      HRTIMER_MODE_REL_PINNED);
> +
> +	stm32_ddr_pmu_event_configure(event);
> +
> +	hw->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
> +
> +	if (flags & PERF_EF_START)
> +		stm32_ddr_pmu_event_start(event, 0);
> +
> +	return 0;
> +}
> +
> +static void stm32_ddr_pmu_event_del(struct perf_event *event, int flags)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
> +	unsigned long config_base = event->hw.config_base;
> +	bool stop = true;
> +	int i;
> +
> +	stm32_ddr_pmu_event_stop(event, PERF_EF_UPDATE);
> +
> +	stm32_ddr_pmu->events_cnt[config_base] += local64_read(&event->count);
> +	stm32_ddr_pmu->events[config_base] = NULL;
> +
> +	for (i = 0; i < PMU_NR_COUNTERS; i++)
> +		if (stm32_ddr_pmu->events[i])
> +			stop = false;
> +	if (stop)

This is just i == PMU_NR_COUNTERS if you add a break in the if clause.

> +		hrtimer_cancel(&stm32_ddr_pmu->hrtimer);
> +
> +	clk_disable(stm32_ddr_pmu->clk);
> +}
> +
> +static int stm32_ddr_pmu_event_init(struct perf_event *event)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = pmu_to_stm32_ddr_pmu(event->pmu);
> +	struct hw_perf_event *hw = &event->hw;
> +
> +	if (event->attr.type != event->pmu->type)
> +		return -ENOENT;
> +
> +	if (is_sampling_event(event))
> +		return -EINVAL;
> +
> +	if (event->attach_state & PERF_ATTACH_TASK)
> +		return -EINVAL;
> +
> +	if (event->attr.exclude_user   ||
> +	    event->attr.exclude_kernel ||
> +	    event->attr.exclude_hv     ||
> +	    event->attr.exclude_idle   ||
> +	    event->attr.exclude_host   ||
> +	    event->attr.exclude_guest)
> +		return -EINVAL;
> +
> +	if (event->cpu < 0)
> +		return -EINVAL;
> +
> +	hw->config_base = event->attr.config;
> +	event->cpu = cpumask_first(&stm32_ddr_pmu->pmu_cpu);
> +
> +	return 0;
> +}
> +
> +static enum hrtimer_restart stm32_ddr_pmu_poll(struct hrtimer *hrtimer)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu = hrtimer_to_stm32_ddr_pmu(hrtimer);
> +	int i;
> +
> +	for (i = 0; i < PMU_NR_COUNTERS; i++)
> +		if (stm32_ddr_pmu->events[i])
> +			stm32_ddr_pmu_event_read(stm32_ddr_pmu->events[i]);
> +
> +	hrtimer_forward_now(hrtimer, stm32_ddr_pmu->poll_period);
> +
> +	return HRTIMER_RESTART;
> +}
> +
> +static ssize_t stm32_ddr_pmu_sysfs_show(struct device *dev,
> +					struct device_attribute *attr,
> +					char *buf)
> +{
> +	struct dev_ext_attribute *eattr;
> +
> +	eattr = container_of(attr, struct dev_ext_attribute, attr);
> +
> +	return sprintf(buf, "config=0x%lx\n", (unsigned long)eattr->var);
> +}

Will you ever want to use other bits in the config to configure the PMU?
If so, perhaps its worth carving out a smaller event field, a bit like
fsl_imx8_ddr_perf.c does.

> +
> +#define STM32_DDR_PMU_ATTR(_name, _func, _config)			\
> +	(&((struct dev_ext_attribute[]) {				\
> +		{ __ATTR(_name, 0444, _func, NULL), (void *)_config }   \
> +	})[0].attr.attr)
> +
> +#define STM32_DDR_PMU_EVENT_ATTR(_name, _config)		\
> +	STM32_DDR_PMU_ATTR(_name, stm32_ddr_pmu_sysfs_show,	\
> +			   (unsigned long)_config)
> +
> +static struct attribute *stm32_ddr_pmu_event_attrs[] = {
> +	STM32_DDR_PMU_EVENT_ATTR(read_cnt, READ_CNT),
> +	STM32_DDR_PMU_EVENT_ATTR(write_cnt, WRITE_CNT),
> +	STM32_DDR_PMU_EVENT_ATTR(activate_cnt, ACTIVATE_CNT),
> +	STM32_DDR_PMU_EVENT_ATTR(idle_cnt, IDLE_CNT),
> +	STM32_DDR_PMU_EVENT_ATTR(time_cnt, TIME_CNT),
> +	NULL
> +};
> +
> +static struct attribute_group stm32_ddr_pmu_event_attrs_group = {
> +	.name = "events",
> +	.attrs = stm32_ddr_pmu_event_attrs,
> +};
> +
> +static const struct attribute_group *stm32_ddr_pmu_attr_groups[] = {
> +	&stm32_ddr_pmu_event_attrs_group,
> +	NULL,
> +};
> +
> +static int stm32_ddr_pmu_device_probe(struct platform_device *pdev)
> +{
> +	struct stm32_ddr_pmu *stm32_ddr_pmu;
> +	struct reset_control *rst;
> +	struct resource *res;
> +	int i, ret;
> +	u32 val;
> +
> +	stm32_ddr_pmu = devm_kzalloc(&pdev->dev, sizeof(struct stm32_ddr_pmu),
> +				     GFP_KERNEL);
> +	if (!stm32_ddr_pmu)
> +		return -ENOMEM;
> +	platform_set_drvdata(pdev, stm32_ddr_pmu);
> +
> +	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> +	stm32_ddr_pmu->membase = devm_ioremap_resource(&pdev->dev, res);
> +	if (IS_ERR(stm32_ddr_pmu->membase)) {
> +		pr_warn("Unable to get STM32 DDR PMU membase\n");
> +		return PTR_ERR(stm32_ddr_pmu->membase);
> +	}
> +
> +	stm32_ddr_pmu->clk = devm_clk_get(&pdev->dev, NULL);
> +	if (IS_ERR(stm32_ddr_pmu->clk)) {
> +		pr_warn("Unable to get STM32 DDR PMU clock\n");
> +		return PTR_ERR(stm32_ddr_pmu->clk);
> +	}
> +
> +	ret = clk_prepare_enable(stm32_ddr_pmu->clk);
> +	if (ret) {
> +		pr_warn("Unable to prepare STM32 DDR PMU clock\n");
> +		return ret;
> +	}
> +
> +	stm32_ddr_pmu->poll_period = ms_to_ktime(POLL_MS);
> +	hrtimer_init(&stm32_ddr_pmu->hrtimer, CLOCK_MONOTONIC,
> +		     HRTIMER_MODE_REL);

I would /much/ prefer for the timer to be handled by the perf core
automatically when a PMU is registered with PERF_PMU_CAP_NO_INTERRUPT. That
way, other drivers can benefit from this without tonnes of code duplication.

> +	stm32_ddr_pmu->hrtimer.function = stm32_ddr_pmu_poll;
> +
> +	/*
> +	 * The PMU is assigned to the cpu0 and there is no need to manage cpu
> +	 * hot plug migration because cpu0 is always the first/last active cpu
> +	 * during low power transitions.
> +	 */
> +	cpumask_set_cpu(0, &stm32_ddr_pmu->pmu_cpu);
> +
> +	for (i = 0; i < PMU_NR_COUNTERS; i++) {
> +		stm32_ddr_pmu->events[i] = NULL;
> +		stm32_ddr_pmu->events_cnt[i] = 0;
> +	}
> +
> +	val = readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_SID);
> +	if (val != SID_MAGIC_ID)
> +		return -EINVAL;
> +
> +	stm32_ddr_pmu->pmu = (struct pmu) {
> +		.task_ctx_nr = perf_invalid_context,
> +		.start = stm32_ddr_pmu_event_start,
> +		.stop = stm32_ddr_pmu_event_stop,
> +		.add = stm32_ddr_pmu_event_add,
> +		.del = stm32_ddr_pmu_event_del,
> +		.event_init = stm32_ddr_pmu_event_init,
> +		.attr_groups = stm32_ddr_pmu_attr_groups,
> +	};
> +	ret = perf_pmu_register(&stm32_ddr_pmu->pmu, "stm32_ddr_pmu", -1);

You might want an index on the end of this name in case you ever want to
support more than one in a given SoC.

> +	if (ret) {
> +		pr_warn("Unable to register STM32 DDR PMU\n");
> +		return ret;
> +	}
> +
> +	rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
> +	if (!IS_ERR(rst)) {
> +		reset_control_assert(rst);
> +		udelay(2);
> +		reset_control_deassert(rst);
> +	}
> +
> +	pr_info("stm32-ddr-pmu: probed (DDRPERFM ID=0x%08x VER=0x%08x)\n",
> +		readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_ID),
> +		readl_relaxed(stm32_ddr_pmu->membase + DDRPERFM_VER));

dev_info(). Similarly for many of your other pr_*() calls.

Will



[Index of Archives]     [Device Tree Compilter]     [Device Tree Spec]     [Linux Driver Backports]     [Video for Linux]     [Linux USB Devel]     [Linux PCI Devel]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Yosemite Backpacking]


  Powered by Linux