* split off pmu info into singleton and per-cpu bits * setup PMU on all cores Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Signed-off-by: Alexey Brodkin <abrodkin@xxxxxxxxxxxx> --- Compared to v1: [1] Rebase on top of previos patches hence changes in patch itself [2] Cosmetics arch/arc/kernel/perf_event.c | 71 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 3203141..008fa58 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -21,13 +21,25 @@ struct arc_pmu { struct pmu pmu; + unsigned int irq; int n_counters; int n_events; - unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)]; u64 max_period; int ev_hw_idx[PERF_COUNT_ARC_HW_MAX]; + u64 raw_events[ARC_PERF_MAX_EVENTS]; +}; + +struct arc_pmu_cpu { + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)]; + + /* + * The events that are active on the PMU for the given index. + */ struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS]; - u64 raw_events[ARC_PERF_MAX_EVENTS]; }; struct arc_callchain_trace { @@ -69,6 +81,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) } static struct arc_pmu *arc_pmu; +static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu); /* read counter #idx; note that counter# != event# on ARC! */ static uint64_t arc_pmu_read_counter(int idx) @@ -323,10 +336,12 @@ static void arc_pmu_stop(struct perf_event *event, int flags) static void arc_pmu_del(struct perf_event *event, int flags) { + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); + arc_pmu_stop(event, PERF_EF_UPDATE); - __clear_bit(event->hw.idx, arc_pmu->used_mask); + __clear_bit(event->hw.idx, pmu_cpu->used_mask); - arc_pmu->act_counter[event->hw.idx] = 0; + pmu_cpu->act_counter[event->hw.idx] = 0; perf_event_update_userpage(event); } @@ -334,22 +349,23 @@ static void arc_pmu_del(struct perf_event *event, int flags) /* allocate hardware counter and optionally start counting */ static int arc_pmu_add(struct perf_event *event, int flags) { + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - if (__test_and_set_bit(idx, arc_pmu->used_mask)) { - idx = find_first_zero_bit(arc_pmu->used_mask, + if (__test_and_set_bit(idx, pmu_cpu->used_mask)) { + idx = find_first_zero_bit(pmu_cpu->used_mask, arc_pmu->n_counters); if (idx == arc_pmu->n_counters) return -EAGAIN; - __set_bit(idx, arc_pmu->used_mask); + __set_bit(idx, pmu_cpu->used_mask); hwc->idx = idx; } write_aux_reg(ARC_REG_PCT_INDEX, idx); - arc_pmu->act_counter[idx] = event; + pmu_cpu->act_counter[idx] = event; if (is_sampling_event(event)) { /* Mimic full counter overflow as other arches do */ @@ -380,7 +396,7 @@ static int arc_pmu_add(struct perf_event *event, int flags) static irqreturn_t arc_pmu_intr(int irq, void *dev) { struct perf_sample_data data; - struct arc_pmu *arc_pmu = (struct arc_pmu *)dev; + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); struct pt_regs *regs; int active_ints; int idx; @@ -392,7 +408,7 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) regs = get_irq_regs(); for (idx = 0; idx < arc_pmu->n_counters; idx++) { - struct perf_event *event = arc_pmu->act_counter[idx]; + struct perf_event *event = pmu_cpu->act_counter[idx]; struct hw_perf_event *hwc; if (!(active_ints & (1 << idx))) @@ -435,6 +451,17 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) #endif /* CONFIG_ISA_ARCV2 */ +void arc_cpu_pmu_irq_init(void) +{ + struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); + + arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr, + "ARC perf counters", pmu_cpu); + + /* Clear all pending interrupt flags */ + write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); +} + static int arc_pmu_device_probe(struct platform_device *pdev) { struct arc_reg_pct_build pct_bcr; @@ -543,18 +570,30 @@ static int arc_pmu_device_probe(struct platform_device *pdev) if (has_interrupts) { int irq = platform_get_irq(pdev, 0); + unsigned long flags; if (irq < 0) { pr_err("Cannot get IRQ number for the platform\n"); return -ENODEV; } - ret = devm_request_irq(&pdev->dev, irq, arc_pmu_intr, 0, - "arc-pmu", arc_pmu); - if (ret) { - pr_err("could not allocate PMU IRQ\n"); - return ret; - } + arc_pmu->irq = irq; + + /* + * arc_cpu_pmu_irq_init() needs to be called on all cores for + * their respective local PMU. + * However we use opencoded on_each_cpu() to ensure it is called + * on core0 first, so that arc_request_percpu_irq() sets up + * AUTOEN etc. Otherwise enable_percpu_irq() fails to enable + * perf IRQ on non master cores. + * see arc_request_percpu_irq() + */ + preempt_disable(); + local_irq_save(flags); + arc_cpu_pmu_irq_init(); + local_irq_restore(flags); + smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1); + preempt_enable(); /* Clean all pending interrupt flags */ write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff); -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-arch" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html