The programmable AXI performance monitors (APM) collect real-time transaction metrics at multiple points on the AXI interconnect to help system software profile real-time activity. In our platform we have it in PL and also some of the hardened instances in PS. Add Xilinx APM driver support. Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xxxxxxx> --- drivers/perf/Kconfig | 11 + drivers/perf/Makefile | 1 + drivers/perf/xilinx_apm.c | 516 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 528 insertions(+) create mode 100644 drivers/perf/xilinx_apm.c diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 1e2d69453771..9be2c5d1f37e 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -183,6 +183,17 @@ config APPLE_M1_CPU_PMU Provides support for the non-architectural CPU PMUs present on the Apple M1 SoCs and derivatives. +config XILINX_APM_PMU + tristate "Enable PMU support for the Xilinx APM controller" + help + Enables perf support for the Xilinx Axi Performance Monitor + controller.The IP AXI Performance Monitor core measures + major performance metrics for the AMBA AXI system. The Performance + Monitor measures bus latency of a specific master/slave (AXI4/AXI3/AXI4-Stream/AXI4-Lite) + in a system and the amount of memory traffic for specific durations. + This core can also be used for real-time profiling for software applications. + + source "drivers/perf/hisilicon/Kconfig" config MARVELL_CN10K_DDR_PMU diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 57a279c61df5..44f4505920b1 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o +obj-$(CONFIG_XILINX_APM_PMU) += xilinx_apm.o diff --git a/drivers/perf/xilinx_apm.c b/drivers/perf/xilinx_apm.c new file mode 100644 index 000000000000..4106805a2a48 --- /dev/null +++ b/drivers/perf/xilinx_apm.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Driver for Xilinx APM Performance Reporting + * + * Copyright 2022 AMD, Inc. + * + */ + +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/of.h> +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> + +#define XAPM_MODE_ADVANCED 1 +#define XAPM_MODE_PROFILE 2 +#define XAPM_MODE_TRACE 3 + +#define XAPM_MSR_OFFSET 0x44 +#define XAPM_MSR_COUNTER 8 +#define XAPM_SLOTID_SHIFT 0x5 +#define XAPM_CTRL_OFFSET 0x300 +#define XAPM_MCR_OFFSET 0x100 +#define XAPM_MCR_COUNTER 16 +#define XAPM_RR 0x108 + +#define XAPM_METRICS_CNT_EN BIT(0) +#define XAPM_GLOBAL_CLK_CNT_EN BIT(16) +#define XAPM_COUNTER_MASK 0xF +#define XAPM_EVENT_MASK GENMASK_ULL(11, 0) +#define XAPM_EVENT_SHIFT 0 +#define XAPM_EVTYPE_MASK GENMASK_ULL(15, 12) +#define XAPM_EVTYPE_SHIFT 12 +#define XAPM_EVTYPE_APM 1 + +#define get_event(_config) FIELD_GET(XAPM_EVENT_MASK, _config) +#define get_evtype(_config) FIELD_GET(XAPM_EVTYPE_MASK, _config) + +#define to_xapm_perf_priv(_pmu) container_of(_pmu, struct xapm_perf_priv, pmu) + +#define XAPM_EVENT_CONFIG(_event, _type) \ + ((void *)((((_event) << XAPM_EVENT_SHIFT) & XAPM_EVENT_MASK) | \ + (((_type) << XAPM_EVTYPE_SHIFT) & XAPM_EVTYPE_MASK))) + +PMU_FORMAT_ATTR(event, "config:0-11"); + +/* + * Performance Counter Registers for APM. + * + */ +enum metric { + WRITE_TRANSACTION = 0, + READ_TRANSACTION = 1, + WRITE_BYTE = 2, + READ_BYTE = 3, + WRITE_BEAT = 4, + READ_LATENCY = 5, + WRITE_LATENCY = 6, + SLV_WR_IDLE = 7, + MST_RD_IDLE = 8, + BVALID = 9, + WLASTS = 9, + RLASTS = 10, + MIN_WR_LATENCY = 11, + MAX_WR_LATENCY = 12, + MIN_RD_LATENCY = 13, + MAX_RD_LATENCY = 14, + MAX_METRIC = 15 +}; + +/** + * struct xapm_perf_priv - priv data structure for xapm perf driver + * + * @dev: parent device. + * @ioaddr: mapped base address of DDR region. + * @pmu: pmu data structure for xapm perf counters. + * @cpu: active CPU to which the PMU is bound for accesses. + * @mode: Mode of APM eg advanced/profile/trace . + * @counter: current counter. + * @slot: current slot to be read. + * @range: Range to be allowed. + * @node: handle to the xapm node. + * @maxslots: Maximum number of slots. + * @cpuhp_state: state for CPU hotplug notification. + * @clk: clock handle. + */ +struct xapm_perf_priv { + struct device *dev; + void __iomem *ioaddr; + struct pmu pmu; + unsigned int cpu; + u32 mode; + u32 counter; + u32 slot; + u32 maxslots; + u32 range; + struct hlist_node node; + enum cpuhp_state cpuhp_state; + struct clk *clk; +}; + +static ssize_t cpumask_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct xapm_perf_priv *priv; + + priv = to_xapm_perf_priv(pmu); + + return cpumap_print_to_pagebuf(true, buf, cpumask_of(priv->cpu)); +} +static DEVICE_ATTR_RO(cpumask); + +static ssize_t slot_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + struct pmu *pmu = dev_get_drvdata(dev); + struct xapm_perf_priv *priv; + + priv = to_xapm_perf_priv(pmu); + ret = kstrtou32(buf, 0, &priv->slot); + if (ret < 0) + return ret; + + if (priv->slot > priv->maxslots) + return -EINVAL; + + return size; +} +static DEVICE_ATTR_WO(slot); + +static ssize_t range_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + struct pmu *pmu = dev_get_drvdata(dev); + struct xapm_perf_priv *priv; + + priv = to_xapm_perf_priv(pmu); + ret = kstrtou32(buf, 0, &priv->range); + if (ret < 0) + return ret; + + writel(priv->range, priv->ioaddr + XAPM_RR); + + return size; +} +static DEVICE_ATTR_WO(range); + +static struct attribute *xapm_perf_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + &dev_attr_slot.attr, + &dev_attr_range.attr, + NULL, +}; + +static struct attribute_group xapm_perf_cpumask_group = { + .attrs = xapm_perf_cpumask_attrs, +}; + +static struct attribute *xapm_perf_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group xapm_perf_format_group = { + .name = "format", + .attrs = xapm_perf_format_attrs, +}; + +static ssize_t xapm_perf_event_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *eattr; + unsigned long config; + char *ptr = buf; + + eattr = container_of(attr, struct dev_ext_attribute, attr); + config = (unsigned long)eattr->var; + + ptr += sprintf(ptr, "event=0x%02x\n", (unsigned int)get_event(config)); + + return (ssize_t)(ptr - buf); +} + +#define XAPM_EVENT_ATTR(_name) \ + __ATTR(_name, 0444, xapm_perf_event_show, NULL) + +#define XAPM_EVENT(_name, _event) \ +static struct dev_ext_attribute xapm_perf_event_##_name = { \ + .attr = XAPM_EVENT_ATTR(xapm_##_name), \ + .var = XAPM_EVENT_CONFIG(_event, XAPM_EVTYPE_APM), \ +} + +XAPM_EVENT(write_cnt_0, WRITE_TRANSACTION | (0 << 4)); +XAPM_EVENT(read_cnt_0, READ_TRANSACTION | (0 << 4)); +XAPM_EVENT(write_cnt_1, WRITE_TRANSACTION | (1 << 4)); +XAPM_EVENT(read_cnt_1, READ_TRANSACTION | (1 << 4)); +XAPM_EVENT(write_cnt_2, WRITE_TRANSACTION | (2 << 4)); +XAPM_EVENT(read_cnt_2, READ_TRANSACTION | (2 << 4)); +XAPM_EVENT(write_cnt_3, WRITE_TRANSACTION | (3 << 4)); +XAPM_EVENT(read_cnt_3, READ_TRANSACTION | (3 << 4)); +XAPM_EVENT(write_cnt_4, WRITE_TRANSACTION | (4 << 4)); +XAPM_EVENT(read_cnt_4, READ_TRANSACTION | (4 << 4)); +XAPM_EVENT(write_cnt_5, WRITE_TRANSACTION | (5 << 4)); +XAPM_EVENT(read_cnt_5, READ_TRANSACTION | (5 << 4)); +XAPM_EVENT(write_cnt_6, WRITE_TRANSACTION | (6 << 4)); +XAPM_EVENT(read_cnt_6, READ_TRANSACTION | (6 << 4)); +XAPM_EVENT(write_cnt_7, WRITE_TRANSACTION | (7 << 4)); +XAPM_EVENT(read_cnt_7, READ_TRANSACTION | (7 << 4)); + +static struct attribute *xapm_perf_events_attrs_all[17] = { + &xapm_perf_event_read_cnt_0.attr.attr, + &xapm_perf_event_write_cnt_0.attr.attr, + &xapm_perf_event_read_cnt_1.attr.attr, + &xapm_perf_event_write_cnt_1.attr.attr, + &xapm_perf_event_read_cnt_2.attr.attr, + &xapm_perf_event_write_cnt_2.attr.attr, + &xapm_perf_event_read_cnt_3.attr.attr, + &xapm_perf_event_write_cnt_3.attr.attr, + &xapm_perf_event_read_cnt_4.attr.attr, + &xapm_perf_event_write_cnt_4.attr.attr, + &xapm_perf_event_read_cnt_5.attr.attr, + &xapm_perf_event_write_cnt_5.attr.attr, + &xapm_perf_event_read_cnt_6.attr.attr, + &xapm_perf_event_write_cnt_6.attr.attr, + &xapm_perf_event_read_cnt_7.attr.attr, + &xapm_perf_event_write_cnt_7.attr.attr, + NULL, +}; + +static u64 xapm_read_event_counter(struct xapm_perf_priv *priv, u32 event) +{ + void __iomem *base = priv->ioaddr; + u64 counter; + u64 reg; + + priv->counter = event >> 4; + + reg = readl(base + XAPM_MSR_OFFSET); + reg = reg & ~(XAPM_COUNTER_MASK << priv->counter * XAPM_MSR_COUNTER); + reg = reg | (event) << priv->counter * 8; + reg = reg | (priv->slot) << (priv->counter * 8 + XAPM_SLOTID_SHIFT); + writel(reg, base + XAPM_MSR_OFFSET); + + counter = readl(base + XAPM_MCR_OFFSET + (priv->counter * XAPM_MCR_COUNTER)); + + return counter; +} + +static int xapm_perf_event_init(struct perf_event *event) +{ + struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu); + struct hw_perf_event *hwc = &event->hw; + void __iomem *base = priv->ioaddr; + u32 reg; + + hwc->event_base = get_evtype(event->attr.config); + hwc->idx = (int)get_event(event->attr.config); + + reg = readl(base + XAPM_CTRL_OFFSET); + reg |= XAPM_GLOBAL_CLK_CNT_EN; + reg |= XAPM_METRICS_CNT_EN; + writel(reg, base + XAPM_CTRL_OFFSET); + + return 0; +} + +static void xapm_perf_event_update(struct perf_event *event) +{ + struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 now, prev, delta; + + now = xapm_read_event_counter(priv, (u32)hwc->idx); + prev = local64_read(&hwc->prev_count); + delta = now - prev; + + local64_add(delta, &event->count); +} + +static void xapm_perf_event_start(struct perf_event *event, int flags) +{ + struct xapm_perf_priv *priv = to_xapm_perf_priv(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 count; + + count = xapm_read_event_counter(priv, (u32)hwc->idx); + local64_set(&hwc->prev_count, count); +} + +static void xapm_perf_event_stop(struct perf_event *event, int flags) +{ + xapm_perf_event_update(event); +} + +static int xapm_perf_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + xapm_perf_event_start(event, flags); + + return 0; +} + +static void xapm_perf_event_del(struct perf_event *event, int flags) +{ + xapm_perf_event_stop(event, PERF_EF_UPDATE); +} + +static void xapm_perf_event_read(struct perf_event *event) +{ + xapm_perf_event_update(event); +} + +static int xapm_perf_offline_cpu(unsigned int cpu, struct hlist_node *node) +{ + struct xapm_perf_priv *priv; + int target; + + priv = hlist_entry_safe(node, struct xapm_perf_priv, node); + + if (cpu != priv->cpu) + return 0; + + target = cpumask_any_but(cpu_online_mask, cpu); + if (target >= nr_cpu_ids) + return 0; + + priv->cpu = target; + return 0; +} + +static int xapm_perf_probe(struct platform_device *pdev) +{ + static struct attribute_group xapm_perf_events_group; + static struct attribute *xapm_perf_events_attrs[17]; + struct xapm_perf_priv *priv; + void __iomem *baseaddr; + struct resource *res; + u32 numcounters; + struct pmu *pmu; + u32 mode = 0; + char *name; + int ret, i; + static const struct attribute_group *xapm_perf_groups[] = { + &xapm_perf_format_group, + &xapm_perf_cpumask_group, + &xapm_perf_events_group, + NULL, + }; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + platform_set_drvdata(pdev, priv); + baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(baseaddr)) + return PTR_ERR(baseaddr); + + priv->dev = &pdev->dev; + priv->ioaddr = baseaddr; + + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,num-of-counters", + &numcounters); + if (ret < 0) { + dev_err(&pdev->dev, "no property xlnx,num-of-counters"); + return ret; + } + + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + if (PTR_ERR(priv->clk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "axi clock error\n"); + return PTR_ERR(priv->clk); + } + + priv->cpu = raw_smp_processor_id(); + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "perf/xapm/xapm:online", + NULL, xapm_perf_offline_cpu); + if (ret < 0) + return ret; + + priv->cpuhp_state = ret; + priv->mode = XAPM_MODE_ADVANCED; + + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,enable-profile", &mode); + if (ret < 0) + dev_info(&pdev->dev, "no property xlnx,enable-profile\n"); + else if (mode) + priv->mode = XAPM_MODE_PROFILE; + + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,enable-trace", &mode); + if (ret < 0) + dev_info(&pdev->dev, "no property xlnx,enable-trace\n"); + else if (mode) + priv->mode = XAPM_MODE_TRACE; + + ret = clk_prepare_enable(priv->clk); + if (ret) { + dev_err(&pdev->dev, "Unable to enable clock.\n"); + goto cpuhp_instance_err; + } + /* Register the pmu instance for cpu hotplug */ + ret = cpuhp_state_add_instance_nocalls(priv->cpuhp_state, &priv->node); + if (ret) + goto cpuhp_instance_err; + + for (i = 0; i < numcounters * 2; i++) + xapm_perf_events_attrs[i] = xapm_perf_events_attrs_all[i]; + xapm_perf_events_attrs[i] = NULL; + + xapm_perf_events_group.name = "events"; + xapm_perf_events_group.attrs = xapm_perf_events_attrs; + + pmu = &priv->pmu; + + pmu->task_ctx_nr = perf_invalid_context; + pmu->attr_groups = xapm_perf_groups; + pmu->event_init = xapm_perf_event_init; + pmu->add = xapm_perf_event_add; + pmu->del = xapm_perf_event_del; + pmu->start = xapm_perf_event_start; + pmu->stop = xapm_perf_event_stop; + pmu->read = xapm_perf_event_read; + pmu->capabilities = PERF_PMU_CAP_NO_INTERRUPT | + PERF_PMU_CAP_NO_EXCLUDE; + + ret = of_property_read_u32(pdev->dev.of_node, "xlnx,num-monitor-slots", + &priv->maxslots); + if (ret < 0) { + dev_err(&pdev->dev, "no property xlnx,num-monitor-slots"); + return ret; + } + + name = devm_kasprintf(priv->dev, GFP_KERNEL, "xapm%llx_counter", + res->start); + ret = perf_pmu_register(pmu, name, -1); + if (ret) + goto pmu_register_err; + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + return 0; +pmu_register_err: + cpuhp_state_remove_instance_nocalls(priv->cpuhp_state, &priv->node); +cpuhp_instance_err: + cpuhp_remove_multi_state(priv->cpuhp_state); + return ret; +} + +static int xapm_perf_remove(struct platform_device *pdev) +{ + struct xapm_perf_priv *priv = platform_get_drvdata(pdev); + + perf_pmu_unregister(&priv->pmu); + cpuhp_state_remove_instance_nocalls(priv->cpuhp_state, &priv->node); + clk_disable_unprepare(priv->clk); + pm_runtime_disable(&pdev->dev); + pm_runtime_set_suspended(&pdev->dev); + return 0; +} + +static int __maybe_unused xapm_runtime_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct xapm_perf_priv *priv = platform_get_drvdata(pdev); + + clk_disable_unprepare(priv->clk); + return 0; +}; + +static int __maybe_unused xapm_runtime_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct xapm_perf_priv *priv = platform_get_drvdata(pdev); + int ret; + + ret = clk_prepare_enable(priv->clk); + if (ret) { + dev_err(&pdev->dev, "Unable to enable clock.\n"); + return ret; + } + return 0; +}; + +static const struct dev_pm_ops xapm_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(xapm_runtime_suspend, xapm_runtime_resume) + SET_RUNTIME_PM_OPS(xapm_runtime_suspend, + xapm_runtime_resume, NULL) +}; + +static const struct of_device_id xapm_perf_match[] = { + { .compatible = "xlnx,axi-perf-monitor", }, + {}, +}; + +static struct platform_driver xlnx_apm_driver = { + .driver = { + .name = "xlnx_apm_perf", + .of_match_table = xapm_perf_match, + .suppress_bind_attrs = true, + .pm = &xapm_dev_pm_ops, + }, + .probe = xapm_perf_probe, + .remove = xapm_perf_remove, +}; +builtin_platform_driver(xlnx_apm_driver); -- 2.17.1