Hi, Baolin, I droped your Revivewed-by tag due to that I made significant changes to this patch previously, please explicty give me Revivewed-by tag again if you are happy with the changes. Thank you. Best Regards, Shuai On 2023/10/20 21:42, Shuai Xue wrote: > This commit adds the PCIe Performance Monitoring Unit (PMU) driver support > for T-Head Yitian SoC chip. Yitian is based on the Synopsys PCI Express > Core controller IP which provides statistics feature. The PMU is a PCIe > configuration space register block provided by each PCIe Root Port in a > Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error > injection, and Statistics). > > To facilitate collection of statistics the controller provides the > following two features for each Root Port: > > - one 64-bit counter for Time Based Analysis (RX/TX data throughput and > time spent in each low-power LTSSM state) and > - one 32-bit counter for Event Counting (error and non-error events for > a specified lane) > > Note: There is no interrupt for counter overflow. > > This driver adds PMU devices for each PCIe Root Port. And the PMU device is > named based the BDF of Root Port. For example, > > 30:03.0 PCI bridge: Device 1ded:8000 (rev 01) > > the PMU device name for this Root Port is dwc_rootport_3018. > > Example usage of counting PCIe RX TLP data payload (Units of bytes):: > > $# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/ > > average RX bandwidth can be calculated like this: > > PCIe TX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window > > Signed-off-by: Shuai Xue <xueshuai@xxxxxxxxxxxxxxxxx> > --- > drivers/perf/Kconfig | 7 + > drivers/perf/Makefile | 1 + > drivers/perf/dwc_pcie_pmu.c | 770 ++++++++++++++++++++++++++++++++++++ > 3 files changed, 778 insertions(+) > create mode 100644 drivers/perf/dwc_pcie_pmu.c > > diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig > index 273d67ecf6d2..ec6e0d9194a1 100644 > --- a/drivers/perf/Kconfig > +++ b/drivers/perf/Kconfig > @@ -217,6 +217,13 @@ config MARVELL_CN10K_DDR_PMU > Enable perf support for Marvell DDR Performance monitoring > event on CN10K platform. > > +config DWC_PCIE_PMU > + tristate "Synopsys DesignWare PCIe PMU" > + depends on PCI > + help > + Enable perf support for Synopsys DesignWare PCIe PMU Performance > + monitoring event on platform including the Alibaba Yitian 710. > + > source "drivers/perf/arm_cspmu/Kconfig" > > source "drivers/perf/amlogic/Kconfig" > diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile > index 16b3ec4db916..a06338e3401c 100644 > --- a/drivers/perf/Makefile > +++ b/drivers/perf/Makefile > @@ -23,6 +23,7 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o > obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o > obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o > obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o > +obj-$(CONFIG_DWC_PCIE_PMU) += dwc_pcie_pmu.o > obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/ > obj-$(CONFIG_MESON_DDR_PMU) += amlogic/ > obj-$(CONFIG_CXL_PMU) += cxl_pmu.o > diff --git a/drivers/perf/dwc_pcie_pmu.c b/drivers/perf/dwc_pcie_pmu.c > new file mode 100644 > index 000000000000..ddb06d763b0c > --- /dev/null > +++ b/drivers/perf/dwc_pcie_pmu.c > @@ -0,0 +1,770 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Synopsys DesignWare PCIe PMU driver > + * > + * Copyright (C) 2021-2023 Alibaba Inc. > + */ > + > +#include <linux/bitfield.h> > +#include <linux/bitops.h> > +#include <linux/cpuhotplug.h> > +#include <linux/cpumask.h> > +#include <linux/device.h> > +#include <linux/errno.h> > +#include <linux/kernel.h> > +#include <linux/list.h> > +#include <linux/perf_event.h> > +#include <linux/pci.h> > +#include <linux/platform_device.h> > +#include <linux/smp.h> > +#include <linux/sysfs.h> > +#include <linux/types.h> > + > +#define DWC_PCIE_VSEC_RAS_DES_ID 0x02 > +#define DWC_PCIE_EVENT_CNT_CTL 0x8 > + > +/* > + * Event Counter Data Select includes two parts: > + * - 27-24: Group number(4-bit: 0..0x7) > + * - 23-16: Event number(8-bit: 0..0x13) within the Group > + * > + * Put them together as in TRM. > + */ > +#define DWC_PCIE_CNT_EVENT_SEL GENMASK(27, 16) > +#define DWC_PCIE_CNT_LANE_SEL GENMASK(11, 8) > +#define DWC_PCIE_CNT_STATUS BIT(7) > +#define DWC_PCIE_CNT_ENABLE GENMASK(4, 2) > +#define DWC_PCIE_PER_EVENT_OFF 0x1 > +#define DWC_PCIE_PER_EVENT_ON 0x3 > +#define DWC_PCIE_EVENT_CLEAR GENMASK(1, 0) > +#define DWC_PCIE_EVENT_PER_CLEAR 0x1 > + > +#define DWC_PCIE_EVENT_CNT_DATA 0xC > + > +#define DWC_PCIE_TIME_BASED_ANAL_CTL 0x10 > +#define DWC_PCIE_TIME_BASED_REPORT_SEL GENMASK(31, 24) > +#define DWC_PCIE_TIME_BASED_DURATION_SEL GENMASK(15, 8) > +#define DWC_PCIE_DURATION_MANUAL_CTL 0x0 > +#define DWC_PCIE_DURATION_1MS 0x1 > +#define DWC_PCIE_DURATION_10MS 0x2 > +#define DWC_PCIE_DURATION_100MS 0x3 > +#define DWC_PCIE_DURATION_1S 0x4 > +#define DWC_PCIE_DURATION_2S 0x5 > +#define DWC_PCIE_DURATION_4S 0x6 > +#define DWC_PCIE_DURATION_4US 0xFF > +#define DWC_PCIE_TIME_BASED_TIMER_START BIT(0) > +#define DWC_PCIE_TIME_BASED_CNT_ENABLE 0x1 > + > +#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW 0x14 > +#define DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH 0x18 > + > +/* Event attributes */ > +#define DWC_PCIE_CONFIG_EVENTID GENMASK(15, 0) > +#define DWC_PCIE_CONFIG_TYPE GENMASK(19, 16) > +#define DWC_PCIE_CONFIG_LANE GENMASK(27, 20) > + > +#define DWC_PCIE_EVENT_ID(event) FIELD_GET(DWC_PCIE_CONFIG_EVENTID, (event)->attr.config) > +#define DWC_PCIE_EVENT_TYPE(event) FIELD_GET(DWC_PCIE_CONFIG_TYPE, (event)->attr.config) > +#define DWC_PCIE_EVENT_LANE(event) FIELD_GET(DWC_PCIE_CONFIG_LANE, (event)->attr.config) > + > +enum dwc_pcie_event_type { > + DWC_PCIE_TIME_BASE_EVENT, > + DWC_PCIE_LANE_EVENT, > + DWC_PCIE_EVENT_TYPE_MAX, > +}; > + > +#define DWC_PCIE_LANE_EVENT_MAX_PERIOD GENMASK_ULL(31, 0) > +#define DWC_PCIE_TIME_BASED_EVENT_MAX_PERIOD GENMASK_ULL(63, 0) > + > +struct dwc_pcie_pmu { > + struct pmu pmu; > + struct pci_dev *pdev; /* Root Port device */ > + u16 ras_des_offset; > + u32 nr_lanes; > + > + struct list_head pmu_node; > + struct hlist_node cpuhp_node; > + struct perf_event *event[DWC_PCIE_EVENT_TYPE_MAX]; > + int on_cpu; > + bool registered; > +}; > + > +#define to_dwc_pcie_pmu(p) (container_of(p, struct dwc_pcie_pmu, pmu)) > + > +static struct platform_device *dwc_pcie_pmu_dev; > +static int dwc_pcie_pmu_hp_state; > +static struct list_head dwc_pcie_pmu_head = LIST_HEAD_INIT(dwc_pcie_pmu_head); > + > +static ssize_t cpumask_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(dev_get_drvdata(dev)); > + > + return cpumap_print_to_pagebuf(true, buf, cpumask_of(pcie_pmu->on_cpu)); > +} > +static DEVICE_ATTR_RO(cpumask); > + > +static struct attribute *dwc_pcie_pmu_cpumask_attrs[] = { > + &dev_attr_cpumask.attr, > + NULL > +}; > + > +static struct attribute_group dwc_pcie_cpumask_attr_group = { > + .attrs = dwc_pcie_pmu_cpumask_attrs, > +}; > + > +struct dwc_pcie_format_attr { > + struct device_attribute attr; > + u64 field; > + int config; > +}; > + > +static ssize_t dwc_pcie_pmu_format_show(struct device *dev, > + struct device_attribute *attr, > + char *buf) > +{ > + struct dwc_pcie_format_attr *fmt = container_of(attr, typeof(*fmt), attr); > + int lo = __ffs(fmt->field), hi = __fls(fmt->field); > + > + return sysfs_emit(buf, "config:%d-%d\n", lo, hi); > +} > + > +#define _dwc_pcie_format_attr(_name, _cfg, _fld) \ > + (&((struct dwc_pcie_format_attr[]) {{ \ > + .attr = __ATTR(_name, 0444, dwc_pcie_pmu_format_show, NULL),\ > + .config = _cfg, \ > + .field = _fld, \ > + }})[0].attr.attr) > + > +#define dwc_pcie_format_attr(_name, _fld) _dwc_pcie_format_attr(_name, 0, _fld) > + > +static struct attribute *dwc_pcie_format_attrs[] = { > + dwc_pcie_format_attr(type, DWC_PCIE_CONFIG_TYPE), > + dwc_pcie_format_attr(eventid, DWC_PCIE_CONFIG_EVENTID), > + dwc_pcie_format_attr(lane, DWC_PCIE_CONFIG_LANE), > + NULL, > +}; > + > +static struct attribute_group dwc_pcie_format_attrs_group = { > + .name = "format", > + .attrs = dwc_pcie_format_attrs, > +}; > + > +struct dwc_pcie_event_attr { > + struct device_attribute attr; > + enum dwc_pcie_event_type type; > + u16 eventid; > + u8 lane; > +}; > + > +static ssize_t dwc_pcie_event_show(struct device *dev, > + struct device_attribute *attr, char *buf) > +{ > + struct dwc_pcie_event_attr *eattr; > + > + eattr = container_of(attr, typeof(*eattr), attr); > + > + if (eattr->type == DWC_PCIE_LANE_EVENT) > + return sysfs_emit(buf, "eventid=0x%x,type=0x%x,lane=?\n", > + eattr->eventid, eattr->type); > + else if (eattr->type == DWC_PCIE_TIME_BASE_EVENT) > + return sysfs_emit(buf, "eventid=0x%x,type=0x%x\n", > + eattr->eventid, eattr->type); > + > + return 0; > +} > + > +#define DWC_PCIE_EVENT_ATTR(_name, _type, _eventid, _lane) \ > + (&((struct dwc_pcie_event_attr[]) {{ \ > + .attr = __ATTR(_name, 0444, dwc_pcie_event_show, NULL), \ > + .type = _type, \ > + .eventid = _eventid, \ > + .lane = _lane, \ > + }})[0].attr.attr) > + > +#define DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(_name, _eventid) \ > + DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_TIME_BASE_EVENT, _eventid, 0) > +#define DWC_PCIE_PMU_LANE_EVENT_ATTR(_name, _eventid) \ > + DWC_PCIE_EVENT_ATTR(_name, DWC_PCIE_LANE_EVENT, _eventid, 0) > + > +static struct attribute *dwc_pcie_pmu_time_event_attrs[] = { > + /* Group #0 */ > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(one_cycle, 0x00), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_L0S, 0x01), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(RX_L0S, 0x02), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L0, 0x03), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1, 0x04), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_1, 0x05), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_2, 0x06), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(CFG_RCVRY, 0x07), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(TX_RX_L0S, 0x08), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(L1_AUX, 0x09), > + > + /* Group #1 */ > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_PCIe_TLP_Data_Payload, 0x20), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_PCIe_TLP_Data_Payload, 0x21), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Tx_CCIX_TLP_Data_Payload, 0x22), > + DWC_PCIE_PMU_TIME_BASE_EVENT_ATTR(Rx_CCIX_TLP_Data_Payload, 0x23), > + > + /* > + * Leave it to the user to specify the lane ID to avoid generating > + * a list of hundreds of events. > + */ > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ack_dllp, 0x600), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_update_fc_dllp, 0x601), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ack_dllp, 0x602), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_update_fc_dllp, 0x603), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_nulified_tlp, 0x604), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_nulified_tlp, 0x605), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_duplicate_tl, 0x606), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_write, 0x700), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_memory_read, 0x701), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_write, 0x702), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_configuration_read, 0x703), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_write, 0x704), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_io_read, 0x705), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_without_data, 0x706), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_completion_with_data, 0x707), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_message_tlp, 0x708), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_atomic, 0x709), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_tlp_with_prefix, 0x70A), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_write, 0x70B), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_memory_read, 0x70C), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_write, 0x70F), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_io_read, 0x710), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_without_data, 0x711), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_completion_with_data, 0x712), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_message_tlp, 0x713), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_atomic, 0x714), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_tlp_with_prefix, 0x715), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(tx_ccix_tlp, 0x716), > + DWC_PCIE_PMU_LANE_EVENT_ATTR(rx_ccix_tlp, 0x717), > + NULL > +}; > + > +static const struct attribute_group dwc_pcie_event_attrs_group = { > + .name = "events", > + .attrs = dwc_pcie_pmu_time_event_attrs, > +}; > + > +static const struct attribute_group *dwc_pcie_attr_groups[] = { > + &dwc_pcie_event_attrs_group, > + &dwc_pcie_format_attrs_group, > + &dwc_pcie_cpumask_attr_group, > + NULL > +}; > + > +static void dwc_pcie_pmu_lane_event_enable(struct dwc_pcie_pmu *pcie_pmu, > + bool enable) > +{ > + struct pci_dev *pdev = pcie_pmu->pdev; > + u16 ras_des_offset = pcie_pmu->ras_des_offset; > + u32 val; > + > + pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, &val); > + > + /* Clear DWC_PCIE_CNT_ENABLE field first */ > + val &= ~DWC_PCIE_CNT_ENABLE; > + if (enable) > + val |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_ON); > + else > + val |= FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF); > + > + pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, val); > +} > + > +static void dwc_pcie_pmu_time_based_event_enable(struct dwc_pcie_pmu *pcie_pmu, > + bool enable) > +{ > + struct pci_dev *pdev = pcie_pmu->pdev; > + u16 ras_des_offset = pcie_pmu->ras_des_offset; > + u32 val; > + > + pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, > + &val); > + > + if (enable) > + val |= DWC_PCIE_TIME_BASED_CNT_ENABLE; > + else > + val &= ~DWC_PCIE_TIME_BASED_CNT_ENABLE; > + > + pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, > + val); > +} > + > +static u64 dwc_pcie_pmu_read_lane_event_counter(struct perf_event *event) > +{ > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); > + struct pci_dev *pdev = pcie_pmu->pdev; > + u16 ras_des_offset = pcie_pmu->ras_des_offset; > + u32 val; > + > + pci_read_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_DATA, &val); > + > + return val; > +} > + > +static u64 dwc_pcie_pmu_read_time_based_counter(struct perf_event *event) > +{ > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); > + struct pci_dev *pdev = pcie_pmu->pdev; > + int event_id = DWC_PCIE_EVENT_ID(event); > + u16 ras_des_offset = pcie_pmu->ras_des_offset; > + u32 lo, hi, ss; > + > + /* > + * The 64-bit value of the data counter is spread across two > + * registers that are not synchronized. In order to read them > + * atomically, ensure that the high 32 bits match before and after > + * reading the low 32 bits. > + */ > + pci_read_config_dword(pdev, ras_des_offset + > + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, &hi); > + do { > + /* snapshot the high 32 bits */ > + ss = hi; > + > + pci_read_config_dword( > + pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW, > + &lo); > + pci_read_config_dword( > + pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, > + &hi); > + } while (hi != ss); > + > + /* > + * The Group#1 event measures the amount of data processed in 16-byte > + * units. Simplify the end-user interface by multiplying the counter > + * at the point of read. > + */ > + if (event_id >= 0x20 && event_id <= 0x23) > + return (((u64)hi << 32) | lo) << 4; > + else > + return (((u64)hi << 32) | lo); > +} > + > +static void dwc_pcie_pmu_event_update(struct perf_event *event) > +{ > + struct hw_perf_event *hwc = &event->hw; > + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); > + u64 delta, prev, now; > + > + do { > + prev = local64_read(&hwc->prev_count); > + > + if (type == DWC_PCIE_LANE_EVENT) > + now = dwc_pcie_pmu_read_lane_event_counter(event); > + else if (type == DWC_PCIE_TIME_BASE_EVENT) > + now = dwc_pcie_pmu_read_time_based_counter(event); > + > + } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev); > + > + if (type == DWC_PCIE_LANE_EVENT) > + delta = (now - prev) & DWC_PCIE_LANE_EVENT_MAX_PERIOD; > + else if (type == DWC_PCIE_TIME_BASE_EVENT) > + delta = (now - prev) & DWC_PCIE_TIME_BASED_EVENT_MAX_PERIOD; > + > + local64_add(delta, &event->count); > +} > + > +static int dwc_pcie_pmu_event_init(struct perf_event *event) > +{ > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); > + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); > + struct perf_event *sibling; > + u32 lane; > + > + if (event->attr.type != event->pmu->type) > + return -ENOENT; > + > + /* We don't support sampling */ > + if (is_sampling_event(event)) > + return -EINVAL; > + > + /* We cannot support task bound events */ > + if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) > + return -EINVAL; > + > + if (event->group_leader != event && > + !is_software_event(event->group_leader)) > + return -EINVAL; > + > + for_each_sibling_event(sibling, event->group_leader) { > + if (sibling->pmu != event->pmu && !is_software_event(sibling)) > + return -EINVAL; > + } > + > + if (type == DWC_PCIE_LANE_EVENT) { > + lane = DWC_PCIE_EVENT_LANE(event); > + if (lane < 0 || lane >= pcie_pmu->nr_lanes) > + return -EINVAL; > + } > + > + event->cpu = pcie_pmu->on_cpu; > + > + return 0; > +} > + > +static void dwc_pcie_pmu_set_period(struct hw_perf_event *hwc) > +{ > + local64_set(&hwc->prev_count, 0); > +} > + > +static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags) > +{ > + struct hw_perf_event *hwc = &event->hw; > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); > + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); > + > + hwc->state = 0; > + dwc_pcie_pmu_set_period(hwc); > + > + if (type == DWC_PCIE_LANE_EVENT) > + dwc_pcie_pmu_lane_event_enable(pcie_pmu, true); > + else if (type == DWC_PCIE_TIME_BASE_EVENT) > + dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true); > +} > + > +static void dwc_pcie_pmu_event_stop(struct perf_event *event, int flags) > +{ > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); > + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); > + struct hw_perf_event *hwc = &event->hw; > + > + if (event->hw.state & PERF_HES_STOPPED) > + return; > + > + if (type == DWC_PCIE_LANE_EVENT) > + dwc_pcie_pmu_lane_event_enable(pcie_pmu, false); > + else if (type == DWC_PCIE_TIME_BASE_EVENT) > + dwc_pcie_pmu_time_based_event_enable(pcie_pmu, false); > + > + dwc_pcie_pmu_event_update(event); > + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; > +} > + > +static int dwc_pcie_pmu_event_add(struct perf_event *event, int flags) > +{ > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); > + struct pci_dev *pdev = pcie_pmu->pdev; > + struct hw_perf_event *hwc = &event->hw; > + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); > + int event_id = DWC_PCIE_EVENT_ID(event); > + int lane = DWC_PCIE_EVENT_LANE(event); > + u16 ras_des_offset = pcie_pmu->ras_des_offset; > + u32 ctrl; > + > + /* one counter for each type and it is in use */ > + if (pcie_pmu->event[type]) > + return -ENOSPC; > + > + pcie_pmu->event[type] = event; > + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; > + > + if (type == DWC_PCIE_LANE_EVENT) { > + /* EVENT_COUNTER_DATA_REG needs clear manually */ > + ctrl = FIELD_PREP(DWC_PCIE_CNT_EVENT_SEL, event_id) | > + FIELD_PREP(DWC_PCIE_CNT_LANE_SEL, lane) | > + FIELD_PREP(DWC_PCIE_CNT_ENABLE, DWC_PCIE_PER_EVENT_OFF) | > + FIELD_PREP(DWC_PCIE_EVENT_CLEAR, DWC_PCIE_EVENT_PER_CLEAR); > + pci_write_config_dword(pdev, ras_des_offset + DWC_PCIE_EVENT_CNT_CTL, > + ctrl); > + } else if (type == DWC_PCIE_TIME_BASE_EVENT) { > + /* > + * TIME_BASED_ANAL_DATA_REG is a 64 bit register, we can safely > + * use it with any manually controlled duration. And it is > + * cleared when next measurement starts. > + */ > + ctrl = FIELD_PREP(DWC_PCIE_TIME_BASED_REPORT_SEL, event_id) | > + FIELD_PREP(DWC_PCIE_TIME_BASED_DURATION_SEL, > + DWC_PCIE_DURATION_MANUAL_CTL) | > + DWC_PCIE_TIME_BASED_CNT_ENABLE; > + pci_write_config_dword( > + pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_CTL, ctrl); > + } > + > + if (flags & PERF_EF_START) > + dwc_pcie_pmu_event_start(event, PERF_EF_RELOAD); > + > + perf_event_update_userpage(event); > + > + return 0; > +} > + > +static void dwc_pcie_pmu_event_del(struct perf_event *event, int flags) > +{ > + struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu); > + enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event); > + > + dwc_pcie_pmu_event_stop(event, flags | PERF_EF_UPDATE); > + perf_event_update_userpage(event); > + pcie_pmu->event[type] = NULL; > +} > + > +static void dwc_pcie_pmu_remove_cpuhp_instance(void *hotplug_node) > +{ > + cpuhp_state_remove_instance_nocalls(dwc_pcie_pmu_hp_state, hotplug_node); > +} > + > +/* > + * Find the PMU of a PCI device. > + * @pdev: The PCI device. > + */ > +static struct dwc_pcie_pmu *dwc_pcie_find_dev_pmu(struct pci_dev *pdev) > +{ > + struct dwc_pcie_pmu *pcie_pmu; > + > + list_for_each_entry(pcie_pmu, &dwc_pcie_pmu_head, pmu_node) > + if (pcie_pmu->pdev == pdev) > + return pcie_pmu; > + > + return NULL; > +} > + > +static void dwc_pcie_pmu_unregister_pmu(void *data) > +{ > + struct dwc_pcie_pmu *pcie_pmu = data; > + > + if (!pcie_pmu->registered) > + return; > + > + pcie_pmu->registered = false; > + list_del(&pcie_pmu->pmu_node); > + perf_pmu_unregister(&pcie_pmu->pmu); > +} > + > +static int dwc_pcie_pmu_notifier(struct notifier_block *nb, > + unsigned long action, void *data) > +{ > + struct device *dev = data; > + struct pci_dev *pdev = to_pci_dev(dev); > + struct dwc_pcie_pmu *pcie_pmu; > + > + /* Unregister the PMU when the device is going to be deleted. */ > + if (action != BUS_NOTIFY_DEL_DEVICE) > + return NOTIFY_DONE; > + > + pcie_pmu = dwc_pcie_find_dev_pmu(pdev); > + if (!pcie_pmu) > + return NOTIFY_DONE; > + > + dwc_pcie_pmu_unregister_pmu(pcie_pmu); > + > + return NOTIFY_OK; > +} > + > +static struct notifier_block dwc_pcie_pmu_nb = { > + .notifier_call = dwc_pcie_pmu_notifier, > +}; > + > +static void dwc_pcie_pmu_unregister_nb(void *data) > +{ > + bus_unregister_notifier(&pci_bus_type, &dwc_pcie_pmu_nb); > +} > + > +static int dwc_pcie_pmu_probe(struct platform_device *plat_dev) > +{ > + struct pci_dev *pdev = NULL; > + struct dwc_pcie_pmu *pcie_pmu; > + bool notify = false; > + char *name; > + u32 bdf; > + int ret; > + > + /* Match the rootport with VSEC_RAS_DES_ID, and register a PMU for it */ > + for_each_pci_dev(pdev) { > + u16 vsec; > + u32 val; > + > + if (!(pci_is_pcie(pdev) && > + pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT)) > + continue; > + > + vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_ALIBABA, > + DWC_PCIE_VSEC_RAS_DES_ID); > + if (!vsec) > + continue; > + > + pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val); > + if (PCI_VNDR_HEADER_REV(val) != 0x04) > + continue; > + pci_dbg(pdev, > + "Detected PCIe Vendor-Specific Extended Capability RAS DES\n"); > + > + bdf = PCI_DEVID(pdev->bus->number, pdev->devfn); > + name = devm_kasprintf(&plat_dev->dev, GFP_KERNEL, "dwc_rootport_%x", > + bdf); > + if (!name) { > + ret = -ENOMEM; > + goto out; > + } > + > + /* All checks passed, go go go */ > + pcie_pmu = devm_kzalloc(&plat_dev->dev, sizeof(*pcie_pmu), GFP_KERNEL); > + if (!pcie_pmu) { > + ret = -ENOMEM; > + goto out; > + } > + > + pcie_pmu->pdev = pdev; > + pcie_pmu->ras_des_offset = vsec; > + pcie_pmu->nr_lanes = pcie_get_width_cap(pdev); > + pcie_pmu->on_cpu = -1; > + pcie_pmu->pmu = (struct pmu){ > + .module = THIS_MODULE, > + .attr_groups = dwc_pcie_attr_groups, > + .capabilities = PERF_PMU_CAP_NO_EXCLUDE, > + .task_ctx_nr = perf_invalid_context, > + .event_init = dwc_pcie_pmu_event_init, > + .add = dwc_pcie_pmu_event_add, > + .del = dwc_pcie_pmu_event_del, > + .start = dwc_pcie_pmu_event_start, > + .stop = dwc_pcie_pmu_event_stop, > + .read = dwc_pcie_pmu_event_update, > + }; > + > + /* Add this instance to the list used by the offline callback */ > + ret = cpuhp_state_add_instance(dwc_pcie_pmu_hp_state, > + &pcie_pmu->cpuhp_node); > + if (ret) { > + pci_err(pdev, > + "Error %d registering hotplug @%x\n", ret, bdf); > + goto out; > + } > + > + /* Unwind when platform driver removes */ > + ret = devm_add_action_or_reset( > + &plat_dev->dev, dwc_pcie_pmu_remove_cpuhp_instance, > + &pcie_pmu->cpuhp_node); > + if (ret) > + goto out; > + > + ret = perf_pmu_register(&pcie_pmu->pmu, name, -1); > + if (ret) { > + pci_err(pdev, > + "Error %d registering PMU @%x\n", ret, bdf); > + goto out; > + } > + > + /* Cache PMU to handle pci device hotplug */ > + list_add(&pcie_pmu->pmu_node, &dwc_pcie_pmu_head); > + pcie_pmu->registered = true; > + notify = true; > + > + ret = devm_add_action_or_reset( > + &plat_dev->dev, dwc_pcie_pmu_unregister_pmu, pcie_pmu); > + if (ret) > + goto out; > + } > + > + if (notify && !bus_register_notifier(&pci_bus_type, &dwc_pcie_pmu_nb)) > + return devm_add_action_or_reset( > + &plat_dev->dev, dwc_pcie_pmu_unregister_nb, NULL); > + > + return 0; > + > +out: > + pci_dev_put(pdev); > + > + return ret; > +} > + > +static int dwc_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) > +{ > + struct dwc_pcie_pmu *pcie_pmu; > + > + pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node); > + if (pcie_pmu->on_cpu == -1) > + pcie_pmu->on_cpu = cpumask_local_spread( > + 0, dev_to_node(&pcie_pmu->pdev->dev)); > + > + return 0; > +} > + > +static int dwc_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) > +{ > + struct dwc_pcie_pmu *pcie_pmu; > + struct pci_dev *pdev; > + int node; > + cpumask_t mask; > + unsigned int target; > + > + pcie_pmu = hlist_entry_safe(cpuhp_node, struct dwc_pcie_pmu, cpuhp_node); > + /* Nothing to do if this CPU doesn't own the PMU */ > + if (cpu != pcie_pmu->on_cpu) > + return 0; > + > + pcie_pmu->on_cpu = -1; > + pdev = pcie_pmu->pdev; > + node = dev_to_node(&pdev->dev); > + if (cpumask_and(&mask, cpumask_of_node(node), cpu_online_mask) && > + cpumask_andnot(&mask, &mask, cpumask_of(cpu))) > + target = cpumask_any(&mask); > + else > + target = cpumask_any_but(cpu_online_mask, cpu); > + > + if (target >= nr_cpu_ids) { > + pci_err(pdev, "There is no CPU to set\n"); > + return 0; > + } > + > + /* This PMU does NOT support interrupt, just migrate context. */ > + perf_pmu_migrate_context(&pcie_pmu->pmu, cpu, target); > + pcie_pmu->on_cpu = target; > + > + return 0; > +} > + > +static struct platform_driver dwc_pcie_pmu_driver = { > + .probe = dwc_pcie_pmu_probe, > + .driver = {.name = "dwc_pcie_pmu",}, > +}; > + > +static int __init dwc_pcie_pmu_init(void) > +{ > + int ret; > + > + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, > + "perf/dwc_pcie_pmu:online", > + dwc_pcie_pmu_online_cpu, > + dwc_pcie_pmu_offline_cpu); > + if (ret < 0) > + return ret; > + > + dwc_pcie_pmu_hp_state = ret; > + > + ret = platform_driver_register(&dwc_pcie_pmu_driver); > + if (ret) > + goto platform_driver_register_err; > + > + dwc_pcie_pmu_dev = platform_device_register_simple( > + "dwc_pcie_pmu", PLATFORM_DEVID_NONE, NULL, 0); > + if (IS_ERR(dwc_pcie_pmu_dev)) { > + ret = PTR_ERR(dwc_pcie_pmu_dev); > + goto platform_device_register_error; > + } > + > + return 0; > + > +platform_device_register_error: > + platform_driver_unregister(&dwc_pcie_pmu_driver); > +platform_driver_register_err: > + cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state); > + > + return ret; > +} > + > +static void __exit dwc_pcie_pmu_exit(void) > +{ > + platform_device_unregister(dwc_pcie_pmu_dev); > + platform_driver_unregister(&dwc_pcie_pmu_driver); > + cpuhp_remove_multi_state(dwc_pcie_pmu_hp_state); > +} > + > +module_init(dwc_pcie_pmu_init); > +module_exit(dwc_pcie_pmu_exit); > + > +MODULE_DESCRIPTION("PMU driver for DesignWare Cores PCI Express Controller"); > +MODULE_AUTHOR("Shuai Xue <xueshuai@xxxxxxxxxxxxxxxxx>"); > +MODULE_LICENSE("GPL v2");