CCPI2 is a low-latency high-bandwidth serial interface for connecting ThunderX2 processors. This patch adds support to capture CCPI2 perf events. Signed-off-by: Ganapatrao Kulkarni <gkulkarni@xxxxxxxxxxx> --- drivers/perf/thunderx2_pmu.c | 179 ++++++++++++++++++++++++++++++----- 1 file changed, 157 insertions(+), 22 deletions(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 43d76c85da56..3791ac9b897d 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -16,7 +16,9 @@ * they need to be sampled before overflow(i.e, at every 2 seconds). */ -#define TX2_PMU_MAX_COUNTERS 4 +#define TX2_PMU_DMC_L3C_MAX_COUNTERS 4 +#define TX2_PMU_CCPI2_MAX_COUNTERS 8 + #define TX2_PMU_DMC_CHANNELS 8 #define TX2_PMU_L3_TILES 16 @@ -28,11 +30,22 @@ */ #define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1)) +#define GET_EVENTID_CCPI2(ev) ((ev->hw.config) & 0x1ff) +/* bits[3:0] to select counters, starts from 8, bit[3] set always. */ +#define GET_COUNTERID_CCPI2(ev) ((ev->hw.idx) & 0x7) +#define CCPI2_COUNTER_OFFSET 8 + #define L3C_COUNTER_CTL 0xA8 #define L3C_COUNTER_DATA 0xAC #define DMC_COUNTER_CTL 0x234 #define DMC_COUNTER_DATA 0x240 +#define CCPI2_PERF_CTL 0x108 +#define CCPI2_COUNTER_CTL 0x10C +#define CCPI2_COUNTER_SEL 0x12c +#define CCPI2_COUNTER_DATA_L 0x130 +#define CCPI2_COUNTER_DATA_H 0x134 + /* L3C event IDs */ #define L3_EVENT_READ_REQ 0xD #define L3_EVENT_WRITEBACK_REQ 0xE @@ -51,9 +64,16 @@ #define DMC_EVENT_READ_TXNS 0xF #define DMC_EVENT_MAX 0x10 +#define CCPI2_EVENT_REQ_PKT_SENT 0x3D +#define CCPI2_EVENT_SNOOP_PKT_SENT 0x65 +#define CCPI2_EVENT_DATA_PKT_SENT 0x105 +#define CCPI2_EVENT_GIC_PKT_SENT 0x12D +#define CCPI2_EVENT_MAX 0x200 + enum tx2_uncore_type { PMU_TYPE_L3C, PMU_TYPE_DMC, + PMU_TYPE_CCPI2, PMU_TYPE_INVALID, }; @@ -73,8 +93,8 @@ struct tx2_uncore_pmu { u32 max_events; u64 hrtimer_interval; void __iomem *base; - DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS); - struct perf_event *events[TX2_PMU_MAX_COUNTERS]; + DECLARE_BITMAP(active_counters, TX2_PMU_CCPI2_MAX_COUNTERS); + struct perf_event *events[TX2_PMU_DMC_L3C_MAX_COUNTERS]; struct device *dev; struct hrtimer hrtimer; const struct attribute_group **attr_groups; @@ -92,7 +112,21 @@ static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu) return container_of(pmu, struct tx2_uncore_pmu, pmu); } -PMU_FORMAT_ATTR(event, "config:0-4"); +#define TX2_PMU_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t \ +__tx2_pmu_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ + \ +static struct device_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __tx2_pmu_##_var##_show, NULL) + +TX2_PMU_FORMAT_ATTR(event, event, "config:0-4"); +TX2_PMU_FORMAT_ATTR(event_ccpi2, event, "config:0-9"); static struct attribute *l3c_pmu_format_attrs[] = { &format_attr_event.attr, @@ -104,6 +138,11 @@ static struct attribute *dmc_pmu_format_attrs[] = { NULL, }; +static struct attribute *ccpi2_pmu_format_attrs[] = { + &format_attr_event_ccpi2.attr, + NULL, +}; + static const struct attribute_group l3c_pmu_format_attr_group = { .name = "format", .attrs = l3c_pmu_format_attrs, @@ -114,6 +153,11 @@ static const struct attribute_group dmc_pmu_format_attr_group = { .attrs = dmc_pmu_format_attrs, }; +static const struct attribute_group ccpi2_pmu_format_attr_group = { + .name = "format", + .attrs = ccpi2_pmu_format_attrs, +}; + /* * sysfs event attributes */ @@ -164,6 +208,19 @@ static struct attribute *dmc_pmu_events_attrs[] = { NULL, }; +TX2_EVENT_ATTR(req_pktsent, CCPI2_EVENT_REQ_PKT_SENT); +TX2_EVENT_ATTR(snoop_pktsent, CCPI2_EVENT_SNOOP_PKT_SENT); +TX2_EVENT_ATTR(data_pktsent, CCPI2_EVENT_DATA_PKT_SENT); +TX2_EVENT_ATTR(gic_pktsent, CCPI2_EVENT_GIC_PKT_SENT); + +static struct attribute *ccpi2_pmu_events_attrs[] = { + &tx2_pmu_event_attr_req_pktsent.attr.attr, + &tx2_pmu_event_attr_snoop_pktsent.attr.attr, + &tx2_pmu_event_attr_data_pktsent.attr.attr, + &tx2_pmu_event_attr_gic_pktsent.attr.attr, + NULL, +}; + static const struct attribute_group l3c_pmu_events_attr_group = { .name = "events", .attrs = l3c_pmu_events_attrs, @@ -174,6 +231,11 @@ static const struct attribute_group dmc_pmu_events_attr_group = { .attrs = dmc_pmu_events_attrs, }; +static const struct attribute_group ccpi2_pmu_events_attr_group = { + .name = "events", + .attrs = ccpi2_pmu_events_attrs, +}; + /* * sysfs cpumask attributes */ @@ -213,6 +275,13 @@ static const struct attribute_group *dmc_pmu_attr_groups[] = { NULL }; +static const struct attribute_group *ccpi2_pmu_attr_groups[] = { + &ccpi2_pmu_format_attr_group, + &pmu_cpumask_attr_group, + &ccpi2_pmu_events_attr_group, + NULL +}; + static inline u32 reg_readl(unsigned long addr) { return readl((void __iomem *)addr); @@ -265,6 +334,17 @@ static void init_cntr_base_dmc(struct perf_event *event, + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event)); } +static void init_cntr_base_ccpi2(struct perf_event *event, + struct tx2_uncore_pmu *tx2_pmu) +{ + + struct hw_perf_event *hwc = &event->hw; + + hwc->config_base = (unsigned long)tx2_pmu->base + + CCPI2_COUNTER_CTL + (4 * GET_COUNTERID_CCPI2(event)); + hwc->event_base = (unsigned long)tx2_pmu->base; +} + static void uncore_start_event_l3c(struct perf_event *event, int flags) { u32 val; @@ -312,6 +392,29 @@ static void uncore_stop_event_dmc(struct perf_event *event) reg_writel(val, hwc->config_base); } +static void uncore_start_event_ccpi2(struct perf_event *event, int flags) +{ + u32 val; + struct hw_perf_event *hwc = &event->hw; + + /* Bit [09:00] to set event id, set level and type to 1 */ + val = reg_readl(hwc->config_base); + reg_writel((val & ~0xFFF) | (3 << 10) | + GET_EVENTID_CCPI2(event), hwc->config_base); + /* reset[4], enable[0] and start[1] counters */ + reg_writel(0x13, hwc->event_base + CCPI2_PERF_CTL); + local64_set(&event->hw.prev_count, 0ULL); +} + +static void uncore_stop_event_ccpi2(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* disable and stop counter */ + reg_writel(0, hwc->event_base + CCPI2_PERF_CTL); + reg_writel(0, hwc->config_base); +} + static void tx2_uncore_event_update(struct perf_event *event) { s64 prev, delta, new = 0; @@ -323,12 +426,20 @@ static void tx2_uncore_event_update(struct perf_event *event) tx2_pmu = pmu_to_tx2_pmu(event->pmu); type = tx2_pmu->type; prorate_factor = tx2_pmu->prorate_factor; - - new = reg_readl(hwc->event_base); - prev = local64_xchg(&hwc->prev_count, new); - - /* handles rollover of 32 bit counter */ - delta = (u32)(((1UL << 32) - prev) + new); + if (type == PMU_TYPE_CCPI2) { + reg_writel(CCPI2_COUNTER_OFFSET + GET_COUNTERID_CCPI2(event), + hwc->event_base + CCPI2_COUNTER_SEL); + new = reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_L); + new |= (u64)reg_readl(hwc->event_base + + CCPI2_COUNTER_DATA_H) << 32; + prev = local64_xchg(&hwc->prev_count, new); + delta = new - prev; + } else { + new = reg_readl(hwc->event_base); + prev = local64_xchg(&hwc->prev_count, new); + /* handles rollover of 32 bit counter */ + delta = (u32)(((1UL << 32) - prev) + new); + } /* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */ if (type == PMU_TYPE_DMC && @@ -351,6 +462,7 @@ static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev) } devices[] = { {"CAV901D", PMU_TYPE_L3C}, {"CAV901F", PMU_TYPE_DMC}, + {"CAV901E", PMU_TYPE_CCPI2}, {"", PMU_TYPE_INVALID} }; @@ -380,7 +492,8 @@ static bool tx2_uncore_validate_event(struct pmu *pmu, * Make sure the group of events can be scheduled at once * on the PMU. */ -static bool tx2_uncore_validate_event_group(struct perf_event *event) +static bool tx2_uncore_validate_event_group(struct perf_event *event, + int max_counters) { struct perf_event *sibling, *leader = event->group_leader; int counters = 0; @@ -403,7 +516,7 @@ static bool tx2_uncore_validate_event_group(struct perf_event *event) * If the group requires more counters than the HW has, * it cannot ever be scheduled. */ - return counters <= TX2_PMU_MAX_COUNTERS; + return counters <= max_counters; } @@ -439,7 +552,7 @@ static int tx2_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config; /* Validate the group */ - if (!tx2_uncore_validate_event_group(event)) + if (!tx2_uncore_validate_event_group(event, tx2_pmu->max_counters)) return -EINVAL; return 0; @@ -457,7 +570,8 @@ static void tx2_uncore_event_start(struct perf_event *event, int flags) perf_event_update_userpage(event); /* Start timer for first event */ - if (bitmap_weight(tx2_pmu->active_counters, + if (tx2_pmu->type != PMU_TYPE_CCPI2 && + bitmap_weight(tx2_pmu->active_counters, tx2_pmu->max_counters) == 1) { hrtimer_start(&tx2_pmu->hrtimer, ns_to_ktime(tx2_pmu->hrtimer_interval), @@ -495,7 +609,8 @@ static int tx2_uncore_event_add(struct perf_event *event, int flags) if (hwc->idx < 0) return -EAGAIN; - tx2_pmu->events[hwc->idx] = event; + if (tx2_pmu->type != PMU_TYPE_CCPI2) + tx2_pmu->events[hwc->idx] = event; /* set counter control and data registers base address */ tx2_pmu->init_cntr_base(event, tx2_pmu); @@ -514,10 +629,14 @@ static void tx2_uncore_event_del(struct perf_event *event, int flags) tx2_uncore_event_stop(event, PERF_EF_UPDATE); /* clear the assigned counter */ - free_counter(tx2_pmu, GET_COUNTERID(event)); + if (tx2_pmu->type == PMU_TYPE_CCPI2) + free_counter(tx2_pmu, GET_COUNTERID_CCPI2(event)); + else + free_counter(tx2_pmu, GET_COUNTERID(event)); perf_event_update_userpage(event); - tx2_pmu->events[hwc->idx] = NULL; + if (tx2_pmu->type != PMU_TYPE_CCPI2) + tx2_pmu->events[hwc->idx] = NULL; hwc->idx = -1; } @@ -580,8 +699,12 @@ static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu) cpu_online_mask); tx2_pmu->cpu = cpu; - hrtimer_init(&tx2_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - tx2_pmu->hrtimer.function = tx2_hrtimer_callback; + /* CCPI2 counters are 64 bit counters. */ + if (tx2_pmu->type != PMU_TYPE_CCPI2) { + hrtimer_init(&tx2_pmu->hrtimer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + tx2_pmu->hrtimer.function = tx2_hrtimer_callback; + } ret = tx2_uncore_pmu_register(tx2_pmu); if (ret) { @@ -653,7 +776,7 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, switch (tx2_pmu->type) { case PMU_TYPE_L3C: - tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; tx2_pmu->prorate_factor = TX2_PMU_L3_TILES; tx2_pmu->max_events = L3_EVENT_MAX; tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; @@ -665,7 +788,7 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, tx2_pmu->stop_event = uncore_stop_event_l3c; break; case PMU_TYPE_DMC: - tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS; tx2_pmu->max_events = DMC_EVENT_MAX; tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; @@ -676,6 +799,17 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, tx2_pmu->start_event = uncore_start_event_dmc; tx2_pmu->stop_event = uncore_stop_event_dmc; break; + case PMU_TYPE_CCPI2: + tx2_pmu->max_counters = TX2_PMU_CCPI2_MAX_COUNTERS; + tx2_pmu->prorate_factor = 1; + tx2_pmu->max_events = CCPI2_EVENT_MAX; + tx2_pmu->attr_groups = ccpi2_pmu_attr_groups; + tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, + "uncore_ccpi2_%d", tx2_pmu->node); + tx2_pmu->init_cntr_base = init_cntr_base_ccpi2; + tx2_pmu->start_event = uncore_start_event_ccpi2; + tx2_pmu->stop_event = uncore_stop_event_ccpi2; + break; case PMU_TYPE_INVALID: devm_kfree(dev, tx2_pmu); return NULL; @@ -744,7 +878,8 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, if (cpu != tx2_pmu->cpu) return 0; - hrtimer_cancel(&tx2_pmu->hrtimer); + if (tx2_pmu->type != PMU_TYPE_CCPI2) + hrtimer_cancel(&tx2_pmu->hrtimer); cpumask_copy(&cpu_online_mask_temp, cpu_online_mask); cpumask_clear_cpu(cpu, &cpu_online_mask_temp); new_cpu = cpumask_any_and( -- 2.17.1