This patch is based on Will Deacon's work for ARM. The well-written reasons and ideas can be found here: http://lists.infradead.org/pipermail/linux-arm-kernel/2010-April/013210.html This effort makes the bug-fixes shared by different pmu users/clients (for now, Oprofile & Perf-events), and make them coexist in the system without lock issues, and make their results comparable. So this patch moves Oprofile on top of Perf-events by replacing its original interfaces with new ones calling Perf-events. Oprofile uses raw events, so Perf-events (mipsxx in this patch) is modified to support more mipsxx CPUs. Signed-off-by: Deng-Cheng Zhu <dengcheng.zhu@xxxxxxxxx> --- arch/mips/kernel/perf_event.c | 7 +- arch/mips/kernel/perf_event_mipsxx.c | 125 ++++++++++++------ arch/mips/oprofile/common.c | 237 +++++++++++++++++++++++++--------- 3 files changed, 266 insertions(+), 103 deletions(-) diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index dc3a553..f3bb2f9 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -390,6 +390,9 @@ mipspmu_map_general_event(int idx) { const struct mips_perf_event *pev; + if (!mipspmu->general_event_map) + return ERR_PTR(-EOPNOTSUPP); + pev = ((*mipspmu->general_event_map)[idx].event_id == UNSUPPORTED_PERF_EVENT_ID ? ERR_PTR(-EOPNOTSUPP) : &(*mipspmu->general_event_map)[idx]); @@ -415,6 +418,9 @@ mipspmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return ERR_PTR(-EINVAL); + if (!mipspmu->cache_event_map) + return ERR_PTR(-EOPNOTSUPP); + pev = &((*mipspmu->cache_event_map) [cache_type] [cache_op] @@ -424,7 +430,6 @@ mipspmu_map_cache_event(u64 config) return ERR_PTR(-EOPNOTSUPP); return pev; - } static int validate_event(struct cpu_hw_events *cpuc, diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 4e37a3a..aa8f5f9 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -904,39 +904,36 @@ mipsxx_pmu_map_raw_event(u64 config) raw_event.range = T; #endif break; + case CPU_20KC: + case CPU_25KF: + case CPU_5KC: + case CPU_R10000: + case CPU_R12000: + case CPU_R14000: + case CPU_SB1: + case CPU_SB1A: + raw_event.event_id = base_id; + raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN; +#ifdef CONFIG_MIPS_MT_SMP + raw_event.range = P; +#endif + break; } return &raw_event; } static struct mips_pmu mipsxxcore_pmu = { - .handle_irq = mipsxx_pmu_handle_irq, - .handle_shared_irq = mipsxx_pmu_handle_shared_irq, - .start = mipsxx_pmu_start, - .stop = mipsxx_pmu_stop, - .alloc_counter = mipsxx_pmu_alloc_counter, - .read_counter = mipsxx_pmu_read_counter, - .write_counter = mipsxx_pmu_write_counter, - .enable_event = mipsxx_pmu_enable_event, - .disable_event = mipsxx_pmu_disable_event, - .map_raw_event = mipsxx_pmu_map_raw_event, - .general_event_map = &mipsxxcore_event_map, - .cache_event_map = &mipsxxcore_cache_map, -}; - -static struct mips_pmu mipsxx74Kcore_pmu = { - .handle_irq = mipsxx_pmu_handle_irq, - .handle_shared_irq = mipsxx_pmu_handle_shared_irq, - .start = mipsxx_pmu_start, - .stop = mipsxx_pmu_stop, - .alloc_counter = mipsxx_pmu_alloc_counter, - .read_counter = mipsxx_pmu_read_counter, - .write_counter = mipsxx_pmu_write_counter, - .enable_event = mipsxx_pmu_enable_event, - .disable_event = mipsxx_pmu_disable_event, - .map_raw_event = mipsxx_pmu_map_raw_event, - .general_event_map = &mipsxx74Kcore_event_map, - .cache_event_map = &mipsxx74Kcore_cache_map, + .handle_irq = mipsxx_pmu_handle_irq, + .handle_shared_irq = mipsxx_pmu_handle_shared_irq, + .start = mipsxx_pmu_start, + .stop = mipsxx_pmu_stop, + .alloc_counter = mipsxx_pmu_alloc_counter, + .read_counter = mipsxx_pmu_read_counter, + .write_counter = mipsxx_pmu_write_counter, + .enable_event = mipsxx_pmu_enable_event, + .disable_event = mipsxx_pmu_disable_event, + .map_raw_event = mipsxx_pmu_map_raw_event, }; static int __init @@ -963,35 +960,77 @@ init_hw_perf_events(void) switch (current_cpu_type()) { case CPU_24K: mipsxxcore_pmu.id = MIPS_PMU_ID_24K; - mipsxxcore_pmu.num_counters = counters; - mipspmu = &mipsxxcore_pmu; + mipsxxcore_pmu.general_event_map = &mipsxxcore_event_map; + mipsxxcore_pmu.cache_event_map = &mipsxxcore_cache_map; break; case CPU_34K: mipsxxcore_pmu.id = MIPS_PMU_ID_34K; - mipsxxcore_pmu.num_counters = counters; - mipspmu = &mipsxxcore_pmu; + mipsxxcore_pmu.general_event_map = &mipsxxcore_event_map; + mipsxxcore_pmu.cache_event_map = &mipsxxcore_cache_map; break; case CPU_74K: - mipsxx74Kcore_pmu.id = MIPS_PMU_ID_74K; - mipsxx74Kcore_pmu.num_counters = counters; - mipspmu = &mipsxx74Kcore_pmu; + mipsxxcore_pmu.id = MIPS_PMU_ID_74K; + mipsxxcore_pmu.general_event_map = &mipsxx74Kcore_event_map; + mipsxxcore_pmu.cache_event_map = &mipsxx74Kcore_cache_map; break; case CPU_1004K: mipsxxcore_pmu.id = MIPS_PMU_ID_1004K; - mipsxxcore_pmu.num_counters = counters; - mipspmu = &mipsxxcore_pmu; + mipsxxcore_pmu.general_event_map = &mipsxxcore_event_map; + mipsxxcore_pmu.cache_event_map = &mipsxxcore_cache_map; + break; + /* + * To make perf events fully supported for the following cores, + * we need to fill out the general event map and the cache event + * map. Before that, raw events are supported on these cores. + * Note that the raw events for these cores do not go through the + * accurate check in mipsxx_pmu_map_raw_event(), but they can make + * the perf events the backend of perf clients such as Oprofile. + */ + case CPU_20KC: + mipsxxcore_pmu.id = MIPS_PMU_ID_20K; + mipsxxcore_pmu.general_event_map = NULL; + mipsxxcore_pmu.cache_event_map = NULL; + break; + case CPU_25KF: + mipsxxcore_pmu.id = MIPS_PMU_ID_25K; + mipsxxcore_pmu.general_event_map = NULL; + mipsxxcore_pmu.cache_event_map = NULL; + break; + case CPU_5KC: + mipsxxcore_pmu.id = MIPS_PMU_ID_5K; + mipsxxcore_pmu.general_event_map = NULL; + mipsxxcore_pmu.cache_event_map = NULL; + break; + case CPU_R10000: + if ((current_cpu_data.processor_id & 0xff) == 0x20) + mipsxxcore_pmu.id = MIPS_PMU_ID_R10000V2; + else + mipsxxcore_pmu.id = MIPS_PMU_ID_R10000; + + mipsxxcore_pmu.general_event_map = NULL; + mipsxxcore_pmu.cache_event_map = NULL; + break; + case CPU_R12000: + case CPU_R14000: + mipsxxcore_pmu.id = MIPS_PMU_ID_R12000; + mipsxxcore_pmu.general_event_map = NULL; + mipsxxcore_pmu.cache_event_map = NULL; + break; + case CPU_SB1: + case CPU_SB1A: + mipsxxcore_pmu.id = MIPS_PMU_ID_SB1; + mipsxxcore_pmu.general_event_map = NULL; + mipsxxcore_pmu.cache_event_map = NULL; break; default: - pr_cont("Either hardware does not support performance " - "counters, or not yet implemented.\n"); + pr_cont("Perf events unsupported for this CPU.\n"); return -ENODEV; } + mipsxxcore_pmu.num_counters = counters; + mipspmu = &mipsxxcore_pmu; - if (mipspmu) - pr_cont("%s PMU enabled, %d counters available to each " - "CPU\n", - mips_pmu_names[mipspmu->id], - mipspmu->num_counters); + pr_cont("%s PMU enabled, %d counters available to each " + "CPU\n", mips_pmu_names[mipspmu->id], mipspmu->num_counters); return 0; } diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index f9eb1ab..673745d 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -5,40 +5,160 @@ * * Copyright (C) 2004, 2005 Ralf Baechle * Copyright (C) 2005 MIPS Technologies, Inc. + * Copyright (C) 2010 MIPS Technologies, Inc. Deng-Cheng Zhu (Using perf + * events as the backend of Oprofile. This is mainly based on the idea and + * the code for ARM.) */ #include <linux/compiler.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/oprofile.h> #include <linux/smp.h> +#include <linux/cpumask.h> +#include <linux/perf_event.h> +#include <linux/slab.h> #include <asm/cpu-info.h> +#include <asm/pmu.h> + +#ifdef CONFIG_HW_PERF_EVENTS +/* Per-counter configuration as set via oprofilefs. */ +struct op_counter_config { + unsigned long enabled; + unsigned long event; + unsigned long count; + /* Dummies because I am too lazy to hack the userspace tools. */ + unsigned long kernel; + unsigned long user; + unsigned long exl; + unsigned long unit_mask; + struct perf_event_attr attr; +}; +static struct op_counter_config ctr[20]; +static struct perf_event **perf_events[nr_cpumask_bits]; +static int perf_num_counters; -#include "op_impl.h" +/* + * Overflow callback for oprofile. + */ +static void op_overflow_handler(struct perf_event *event, int unused, + struct perf_sample_data *data, struct pt_regs *regs) +{ + int id; + u32 cpu = smp_processor_id(); + + for (id = 0; id < perf_num_counters; ++id) + if (perf_events[cpu][id] == event) + break; + + if (id != perf_num_counters) + oprofile_add_sample(regs, id); + else + pr_warning("oprofile: ignoring spurious overflow " + "on cpu %u\n", cpu); +} -extern struct op_mips_model op_model_mipsxx_ops __weak; -extern struct op_mips_model op_model_rm9000_ops __weak; -extern struct op_mips_model op_model_loongson2_ops __weak; +/* + * Attributes are created as "pinned" events and so are permanently + * scheduled on the PMU. + */ +static void op_perf_setup(void) +{ + int i; + u32 size = sizeof(struct perf_event_attr); + struct perf_event_attr *attr; + + for (i = 0; i < perf_num_counters; ++i) { + attr = &ctr[i].attr; + memset(attr, 0, size); + attr->type = PERF_TYPE_RAW; + attr->size = size; + attr->config = ctr[i].event + (i & 0x1 ? 128 : 0); + attr->sample_period = ctr[i].count; + attr->pinned = 1; + /* + * Only exclude_user/exclude_kernel/exclude_hv are defined + * in perf_event_attr, maybe we can use exclude_hv for exl. + * But user space perf/oprofile tools need to get agreement. + */ + if (!ctr[i].user) + attr->exclude_user = 1; + if (!ctr[i].kernel && !ctr[i].exl) + attr->exclude_kernel = 1; + } +} -static struct op_mips_model *model; +static int op_create_counter(int cpu, int event) +{ + int ret = 0; + struct perf_event *pevent; + + if (!ctr[event].enabled || (perf_events[cpu][event] != NULL)) + return ret; + + pevent = perf_event_create_kernel_counter(&ctr[event].attr, + cpu, -1, + op_overflow_handler); + + if (IS_ERR(pevent)) { + ret = PTR_ERR(pevent); + } else if (pevent->state != PERF_EVENT_STATE_ACTIVE) { + pr_warning("oprofile: failed to enable event %d " + "on CPU %d (state %d)\n", event, cpu, pevent->state); + ret = -EBUSY; + } else { + perf_events[cpu][event] = pevent; + } -static struct op_counter_config ctr[20]; + return ret; +} -static int op_mips_setup(void) +static void op_destroy_counter(int cpu, int event) +{ + struct perf_event *pevent = perf_events[cpu][event]; + + if (pevent) { + perf_event_release_kernel(pevent); + perf_events[cpu][event] = NULL; + } +} + +static int op_perf_start(void) { - /* Pre-compute the values to stuff in the hardware registers. */ - model->reg_setup(ctr); + int cpu, event, ret = 0; + + for_each_online_cpu(cpu) { + for (event = 0; event < perf_num_counters; ++event) { + ret = op_create_counter(cpu, event); + if (ret) + goto out; + } + } + +out: + return ret; +} - /* Configure the registers on all cpus. */ - on_each_cpu(model->cpu_setup, NULL, 1); +static void op_perf_stop(void) +{ + int cpu, event; - return 0; + for_each_online_cpu(cpu) + for (event = 0; event < perf_num_counters; ++event) + op_destroy_counter(cpu, event); +} + +static int op_mips_setup(void) +{ + op_perf_setup(); + + return 0; } static int op_mips_create_files(struct super_block *sb, struct dentry *root) { int i; - for (i = 0; i < model->num_counters; ++i) { + for (i = 0; i < perf_num_counters; ++i) { struct dentry *dir; char buf[4]; @@ -60,70 +180,69 @@ static int op_mips_create_files(struct super_block *sb, struct dentry *root) static int op_mips_start(void) { - on_each_cpu(model->cpu_start, NULL, 1); - - return 0; + return op_perf_start(); } static void op_mips_stop(void) { - /* Disable performance monitoring for all counters. */ - on_each_cpu(model->cpu_stop, NULL, 1); + op_perf_stop(); } int __init oprofile_arch_init(struct oprofile_operations *ops) { - struct op_mips_model *lmodel = NULL; - int res; - - switch (current_cpu_type()) { - case CPU_5KC: - case CPU_20KC: - case CPU_24K: - case CPU_25KF: - case CPU_34K: - case CPU_1004K: - case CPU_74K: - case CPU_SB1: - case CPU_SB1A: - case CPU_R10000: - case CPU_R12000: - case CPU_R14000: - lmodel = &op_model_mipsxx_ops; - break; - - case CPU_RM9000: - lmodel = &op_model_rm9000_ops; - break; - case CPU_LOONGSON2: - lmodel = &op_model_loongson2_ops; - break; - }; - - if (!lmodel) - return -ENODEV; - - res = lmodel->init(); - if (res) - return res; - - model = lmodel; + int cpu; + + perf_num_counters = mipspmu_get_max_events(); + + for_each_possible_cpu(cpu) { + perf_events[cpu] = kcalloc(perf_num_counters, + sizeof(struct perf_event *), GFP_KERNEL); + if (!perf_events[cpu]) { + pr_info("oprofile: failed to allocate %d perf events " + "for cpu %d\n", perf_num_counters, cpu); + while (--cpu >= 0) + kfree(perf_events[cpu]); + return -ENOMEM; + } + } ops->create_files = op_mips_create_files; ops->setup = op_mips_setup; - //ops->shutdown = op_mips_shutdown; + ops->shutdown = op_mips_stop; ops->start = op_mips_start; ops->stop = op_mips_stop; - ops->cpu_type = lmodel->cpu_type; + ops->cpu_type = (char *)mips_pmu_names[mipspmu_get_pmu_id()]; - printk(KERN_INFO "oprofile: using %s performance monitoring.\n", - lmodel->cpu_type); + if (!ops->cpu_type) + return -ENODEV; + else + pr_info("oprofile: using %s performance monitoring.\n", + ops->cpu_type); return 0; } void oprofile_arch_exit(void) { - if (model) - model->exit(); + int cpu, id; + struct perf_event *event; + + if (*perf_events) { + for_each_possible_cpu(cpu) { + for (id = 0; id < perf_num_counters; ++id) { + event = perf_events[cpu][id]; + if (event) + perf_event_release_kernel(event); + } + kfree(perf_events[cpu]); + } + } +} +#else +int __init oprofile_arch_init(struct oprofile_operations *ops) +{ + pr_info("oprofile: hardware counters not available\n"); + return -ENODEV; } +void oprofile_arch_exit(void) {} +#endif /* CONFIG_HW_PERF_EVENTS */ -- 1.6.3.3