[PATCH v5 11/12] MIPS/Oprofile: use Perf-events framework as backend

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch is based on Will Deacon's work for ARM. The well-written
reasons and ideas can be found here:
http://lists.infradead.org/pipermail/linux-arm-kernel/2010-April/013210.html

This effort makes the bug-fixes shared by different pmu users/clients
(for now, Oprofile & Perf-events), and make them coexist in the system
without lock issues, and make their results comparable.

So this patch moves Oprofile on top of Perf-events by replacing its
original interfaces with new ones calling Perf-events.

Oprofile uses raw events, so Perf-events (mipsxx in this patch) is
modified to support more mipsxx CPUs.

Signed-off-by: Deng-Cheng Zhu <dengcheng.zhu@xxxxxxxxx>
---
 arch/mips/kernel/perf_event.c        |    7 +-
 arch/mips/kernel/perf_event_mipsxx.c |  125 ++++++++++++------
 arch/mips/oprofile/common.c          |  237 +++++++++++++++++++++++++---------
 3 files changed, 266 insertions(+), 103 deletions(-)

diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c
index dc3a553..f3bb2f9 100644
--- a/arch/mips/kernel/perf_event.c
+++ b/arch/mips/kernel/perf_event.c
@@ -390,6 +390,9 @@ mipspmu_map_general_event(int idx)
 {
 	const struct mips_perf_event *pev;
 
+	if (!mipspmu->general_event_map)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	pev = ((*mipspmu->general_event_map)[idx].event_id ==
 		UNSUPPORTED_PERF_EVENT_ID ? ERR_PTR(-EOPNOTSUPP) :
 		&(*mipspmu->general_event_map)[idx]);
@@ -415,6 +418,9 @@ mipspmu_map_cache_event(u64 config)
 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return ERR_PTR(-EINVAL);
 
+	if (!mipspmu->cache_event_map)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	pev = &((*mipspmu->cache_event_map)
 					[cache_type]
 					[cache_op]
@@ -424,7 +430,6 @@ mipspmu_map_cache_event(u64 config)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	return pev;
-
 }
 
 static int validate_event(struct cpu_hw_events *cpuc,
diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c
index 4e37a3a..aa8f5f9 100644
--- a/arch/mips/kernel/perf_event_mipsxx.c
+++ b/arch/mips/kernel/perf_event_mipsxx.c
@@ -904,39 +904,36 @@ mipsxx_pmu_map_raw_event(u64 config)
 			raw_event.range = T;
 #endif
 		break;
+	case CPU_20KC:
+	case CPU_25KF:
+	case CPU_5KC:
+	case CPU_R10000:
+	case CPU_R12000:
+	case CPU_R14000:
+	case CPU_SB1:
+	case CPU_SB1A:
+		raw_event.event_id = base_id;
+		raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN;
+#ifdef CONFIG_MIPS_MT_SMP
+		raw_event.range = P;
+#endif
+		break;
 	}
 
 	return &raw_event;
 }
 
 static struct mips_pmu mipsxxcore_pmu = {
-	.handle_irq = mipsxx_pmu_handle_irq,
-	.handle_shared_irq = mipsxx_pmu_handle_shared_irq,
-	.start = mipsxx_pmu_start,
-	.stop = mipsxx_pmu_stop,
-	.alloc_counter = mipsxx_pmu_alloc_counter,
-	.read_counter = mipsxx_pmu_read_counter,
-	.write_counter = mipsxx_pmu_write_counter,
-	.enable_event = mipsxx_pmu_enable_event,
-	.disable_event = mipsxx_pmu_disable_event,
-	.map_raw_event = mipsxx_pmu_map_raw_event,
-	.general_event_map = &mipsxxcore_event_map,
-	.cache_event_map = &mipsxxcore_cache_map,
-};
-
-static struct mips_pmu mipsxx74Kcore_pmu = {
-	.handle_irq = mipsxx_pmu_handle_irq,
-	.handle_shared_irq = mipsxx_pmu_handle_shared_irq,
-	.start = mipsxx_pmu_start,
-	.stop = mipsxx_pmu_stop,
-	.alloc_counter = mipsxx_pmu_alloc_counter,
-	.read_counter = mipsxx_pmu_read_counter,
-	.write_counter = mipsxx_pmu_write_counter,
-	.enable_event = mipsxx_pmu_enable_event,
-	.disable_event = mipsxx_pmu_disable_event,
-	.map_raw_event = mipsxx_pmu_map_raw_event,
-	.general_event_map = &mipsxx74Kcore_event_map,
-	.cache_event_map = &mipsxx74Kcore_cache_map,
+	.handle_irq		= mipsxx_pmu_handle_irq,
+	.handle_shared_irq	= mipsxx_pmu_handle_shared_irq,
+	.start			= mipsxx_pmu_start,
+	.stop			= mipsxx_pmu_stop,
+	.alloc_counter		= mipsxx_pmu_alloc_counter,
+	.read_counter		= mipsxx_pmu_read_counter,
+	.write_counter		= mipsxx_pmu_write_counter,
+	.enable_event		= mipsxx_pmu_enable_event,
+	.disable_event		= mipsxx_pmu_disable_event,
+	.map_raw_event		= mipsxx_pmu_map_raw_event,
 };
 
 static int __init
@@ -963,35 +960,77 @@ init_hw_perf_events(void)
 	switch (current_cpu_type()) {
 	case CPU_24K:
 		mipsxxcore_pmu.id = MIPS_PMU_ID_24K;
-		mipsxxcore_pmu.num_counters = counters;
-		mipspmu = &mipsxxcore_pmu;
+		mipsxxcore_pmu.general_event_map = &mipsxxcore_event_map;
+		mipsxxcore_pmu.cache_event_map = &mipsxxcore_cache_map;
 		break;
 	case CPU_34K:
 		mipsxxcore_pmu.id = MIPS_PMU_ID_34K;
-		mipsxxcore_pmu.num_counters = counters;
-		mipspmu = &mipsxxcore_pmu;
+		mipsxxcore_pmu.general_event_map = &mipsxxcore_event_map;
+		mipsxxcore_pmu.cache_event_map = &mipsxxcore_cache_map;
 		break;
 	case CPU_74K:
-		mipsxx74Kcore_pmu.id = MIPS_PMU_ID_74K;
-		mipsxx74Kcore_pmu.num_counters = counters;
-		mipspmu = &mipsxx74Kcore_pmu;
+		mipsxxcore_pmu.id = MIPS_PMU_ID_74K;
+		mipsxxcore_pmu.general_event_map = &mipsxx74Kcore_event_map;
+		mipsxxcore_pmu.cache_event_map = &mipsxx74Kcore_cache_map;
 		break;
 	case CPU_1004K:
 		mipsxxcore_pmu.id = MIPS_PMU_ID_1004K;
-		mipsxxcore_pmu.num_counters = counters;
-		mipspmu = &mipsxxcore_pmu;
+		mipsxxcore_pmu.general_event_map = &mipsxxcore_event_map;
+		mipsxxcore_pmu.cache_event_map = &mipsxxcore_cache_map;
+		break;
+	/*
+	 * To make perf events fully supported for the following cores,
+	 * we need to fill out the general event map and the cache event
+	 * map. Before that, raw events are supported on these cores.
+	 * Note that the raw events for these cores do not go through the
+	 * accurate check in mipsxx_pmu_map_raw_event(), but they can make
+	 * the perf events the backend of perf clients such as Oprofile.
+	 */
+	case CPU_20KC:
+		mipsxxcore_pmu.id = MIPS_PMU_ID_20K;
+		mipsxxcore_pmu.general_event_map = NULL;
+		mipsxxcore_pmu.cache_event_map = NULL;
+		break;
+	case CPU_25KF:
+		mipsxxcore_pmu.id = MIPS_PMU_ID_25K;
+		mipsxxcore_pmu.general_event_map = NULL;
+		mipsxxcore_pmu.cache_event_map = NULL;
+		break;
+	case CPU_5KC:
+		mipsxxcore_pmu.id = MIPS_PMU_ID_5K;
+		mipsxxcore_pmu.general_event_map = NULL;
+		mipsxxcore_pmu.cache_event_map = NULL;
+		break;
+	case CPU_R10000:
+		if ((current_cpu_data.processor_id & 0xff) == 0x20)
+			mipsxxcore_pmu.id = MIPS_PMU_ID_R10000V2;
+		else
+			mipsxxcore_pmu.id = MIPS_PMU_ID_R10000;
+
+		mipsxxcore_pmu.general_event_map = NULL;
+		mipsxxcore_pmu.cache_event_map = NULL;
+		break;
+	case CPU_R12000:
+	case CPU_R14000:
+		mipsxxcore_pmu.id = MIPS_PMU_ID_R12000;
+		mipsxxcore_pmu.general_event_map = NULL;
+		mipsxxcore_pmu.cache_event_map = NULL;
+		break;
+	case CPU_SB1:
+	case CPU_SB1A:
+		mipsxxcore_pmu.id = MIPS_PMU_ID_SB1;
+		mipsxxcore_pmu.general_event_map = NULL;
+		mipsxxcore_pmu.cache_event_map = NULL;
 		break;
 	default:
-		pr_cont("Either hardware does not support performance "
-			"counters, or not yet implemented.\n");
+		pr_cont("Perf events unsupported for this CPU.\n");
 		return -ENODEV;
 	}
+	mipsxxcore_pmu.num_counters = counters;
+	mipspmu = &mipsxxcore_pmu;
 
-	if (mipspmu)
-		pr_cont("%s PMU enabled, %d counters available to each "
-			"CPU\n",
-			mips_pmu_names[mipspmu->id],
-			mipspmu->num_counters);
+	pr_cont("%s PMU enabled, %d counters available to each "
+		"CPU\n", mips_pmu_names[mipspmu->id], mipspmu->num_counters);
 
 	return 0;
 }
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index f9eb1ab..673745d 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -5,40 +5,160 @@
  *
  * Copyright (C) 2004, 2005 Ralf Baechle
  * Copyright (C) 2005 MIPS Technologies, Inc.
+ * Copyright (C) 2010 MIPS Technologies, Inc. Deng-Cheng Zhu (Using perf
+ * events as the backend of Oprofile. This is mainly based on the idea and
+ * the code for ARM.)
  */
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/oprofile.h>
 #include <linux/smp.h>
+#include <linux/cpumask.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
 #include <asm/cpu-info.h>
+#include <asm/pmu.h>
+
+#ifdef CONFIG_HW_PERF_EVENTS
+/* Per-counter configuration as set via oprofilefs.  */
+struct op_counter_config {
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long count;
+	/* Dummies because I am too lazy to hack the userspace tools.  */
+	unsigned long kernel;
+	unsigned long user;
+	unsigned long exl;
+	unsigned long unit_mask;
+	struct perf_event_attr attr;
+};
+static struct op_counter_config ctr[20];
+static struct perf_event **perf_events[nr_cpumask_bits];
+static int perf_num_counters;
 
-#include "op_impl.h"
+/*
+ * Overflow callback for oprofile.
+ */
+static void op_overflow_handler(struct perf_event *event, int unused,
+		struct perf_sample_data *data, struct pt_regs *regs)
+{
+	int id;
+	u32 cpu = smp_processor_id();
+
+	for (id = 0; id < perf_num_counters; ++id)
+		if (perf_events[cpu][id] == event)
+			break;
+
+	if (id != perf_num_counters)
+		oprofile_add_sample(regs, id);
+	else
+		pr_warning("oprofile: ignoring spurious overflow "
+			"on cpu %u\n", cpu);
+}
 
-extern struct op_mips_model op_model_mipsxx_ops __weak;
-extern struct op_mips_model op_model_rm9000_ops __weak;
-extern struct op_mips_model op_model_loongson2_ops __weak;
+/*
+ * Attributes are created as "pinned" events and so are permanently
+ * scheduled on the PMU.
+ */
+static void op_perf_setup(void)
+{
+	int i;
+	u32 size = sizeof(struct perf_event_attr);
+	struct perf_event_attr *attr;
+
+	for (i = 0; i < perf_num_counters; ++i) {
+		attr = &ctr[i].attr;
+		memset(attr, 0, size);
+		attr->type		= PERF_TYPE_RAW;
+		attr->size		= size;
+		attr->config		= ctr[i].event + (i & 0x1 ? 128 : 0);
+		attr->sample_period	= ctr[i].count;
+		attr->pinned		= 1;
+		/*
+		 * Only exclude_user/exclude_kernel/exclude_hv are defined
+		 * in perf_event_attr, maybe we can use exclude_hv for exl.
+		 * But user space perf/oprofile tools need to get agreement.
+		 */
+		if (!ctr[i].user)
+			attr->exclude_user = 1;
+		if (!ctr[i].kernel && !ctr[i].exl)
+			attr->exclude_kernel = 1;
+	}
+}
 
-static struct op_mips_model *model;
+static int op_create_counter(int cpu, int event)
+{
+	int ret = 0;
+	struct perf_event *pevent;
+
+	if (!ctr[event].enabled || (perf_events[cpu][event] != NULL))
+		return ret;
+
+	pevent = perf_event_create_kernel_counter(&ctr[event].attr,
+						  cpu, -1,
+						  op_overflow_handler);
+
+	if (IS_ERR(pevent)) {
+		ret = PTR_ERR(pevent);
+	} else if (pevent->state != PERF_EVENT_STATE_ACTIVE) {
+		pr_warning("oprofile: failed to enable event %d "
+			"on CPU %d (state %d)\n", event, cpu, pevent->state);
+		ret = -EBUSY;
+	} else {
+		perf_events[cpu][event] = pevent;
+	}
 
-static struct op_counter_config ctr[20];
+	return ret;
+}
 
-static int op_mips_setup(void)
+static void op_destroy_counter(int cpu, int event)
+{
+	struct perf_event *pevent = perf_events[cpu][event];
+
+	if (pevent) {
+		perf_event_release_kernel(pevent);
+		perf_events[cpu][event] = NULL;
+	}
+}
+
+static int op_perf_start(void)
 {
-	/* Pre-compute the values to stuff in the hardware registers.  */
-	model->reg_setup(ctr);
+	int cpu, event, ret = 0;
+
+	for_each_online_cpu(cpu) {
+		for (event = 0; event < perf_num_counters; ++event) {
+			ret = op_create_counter(cpu, event);
+			if (ret)
+				goto out;
+		}
+	}
+
+out:
+	return ret;
+}
 
-	/* Configure the registers on all cpus.  */
-	on_each_cpu(model->cpu_setup, NULL, 1);
+static void op_perf_stop(void)
+{
+	int cpu, event;
 
-        return 0;
+	for_each_online_cpu(cpu)
+		for (event = 0; event < perf_num_counters; ++event)
+			op_destroy_counter(cpu, event);
+}
+
+static int op_mips_setup(void)
+{
+	op_perf_setup();
+
+	return 0;
 }
 
 static int op_mips_create_files(struct super_block *sb, struct dentry *root)
 {
 	int i;
 
-	for (i = 0; i < model->num_counters; ++i) {
+	for (i = 0; i < perf_num_counters; ++i) {
 		struct dentry *dir;
 		char buf[4];
 
@@ -60,70 +180,69 @@ static int op_mips_create_files(struct super_block *sb, struct dentry *root)
 
 static int op_mips_start(void)
 {
-	on_each_cpu(model->cpu_start, NULL, 1);
-
-	return 0;
+	return op_perf_start();
 }
 
 static void op_mips_stop(void)
 {
-	/* Disable performance monitoring for all counters.  */
-	on_each_cpu(model->cpu_stop, NULL, 1);
+	op_perf_stop();
 }
 
 int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
-	struct op_mips_model *lmodel = NULL;
-	int res;
-
-	switch (current_cpu_type()) {
-	case CPU_5KC:
-	case CPU_20KC:
-	case CPU_24K:
-	case CPU_25KF:
-	case CPU_34K:
-	case CPU_1004K:
-	case CPU_74K:
-	case CPU_SB1:
-	case CPU_SB1A:
-	case CPU_R10000:
-	case CPU_R12000:
-	case CPU_R14000:
-		lmodel = &op_model_mipsxx_ops;
-		break;
-
-	case CPU_RM9000:
-		lmodel = &op_model_rm9000_ops;
-		break;
-	case CPU_LOONGSON2:
-		lmodel = &op_model_loongson2_ops;
-		break;
-	};
-
-	if (!lmodel)
-		return -ENODEV;
-
-	res = lmodel->init();
-	if (res)
-		return res;
-
-	model = lmodel;
+	int cpu;
+
+	perf_num_counters = mipspmu_get_max_events();
+
+	for_each_possible_cpu(cpu) {
+		perf_events[cpu] = kcalloc(perf_num_counters,
+				sizeof(struct perf_event *), GFP_KERNEL);
+		if (!perf_events[cpu]) {
+			pr_info("oprofile: failed to allocate %d perf events "
+				"for cpu %d\n", perf_num_counters, cpu);
+			while (--cpu >= 0)
+				kfree(perf_events[cpu]);
+			return -ENOMEM;
+		}
+	}
 
 	ops->create_files	= op_mips_create_files;
 	ops->setup		= op_mips_setup;
-	//ops->shutdown         = op_mips_shutdown;
+	ops->shutdown		= op_mips_stop;
 	ops->start		= op_mips_start;
 	ops->stop		= op_mips_stop;
-	ops->cpu_type		= lmodel->cpu_type;
+	ops->cpu_type		= (char *)mips_pmu_names[mipspmu_get_pmu_id()];
 
-	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
-	       lmodel->cpu_type);
+	if (!ops->cpu_type)
+		return -ENODEV;
+	else
+		pr_info("oprofile: using %s performance monitoring.\n",
+			ops->cpu_type);
 
 	return 0;
 }
 
 void oprofile_arch_exit(void)
 {
-	if (model)
-		model->exit();
+	int cpu, id;
+	struct perf_event *event;
+
+	if (*perf_events) {
+		for_each_possible_cpu(cpu) {
+			for (id = 0; id < perf_num_counters; ++id) {
+				event = perf_events[cpu][id];
+				if (event)
+					perf_event_release_kernel(event);
+			}
+			kfree(perf_events[cpu]);
+		}
+	}
+}
+#else
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	pr_info("oprofile: hardware counters not available\n");
+	return -ENODEV;
 }
+void oprofile_arch_exit(void) {}
+#endif /* CONFIG_HW_PERF_EVENTS */
-- 
1.6.3.3




[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux