[PATCH 1/3] acpi-cpufreq: Move average performance funtions into separate file and KConfig

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



exports:
EXPORT_SYMBOL_GPL(get_average_perf)
unsigned int get_average_perf(struct cpufreq_policy *policy, unsigned int cpu,
			      u64 *saved_aperf, u64 *saved_mperf)

This function will read the
APERF (0x000000E8) MSR:
Incremented by hardware at the P0 frequency

and
and MPERF (0x000000E7) MSR:
Incremented by hardware at the actual number of core clocks cycles

and will calculate the average frequency the CPU core has run, based on the
saved_aperf/saved_mperf values from a previous call.

Example (untested, evaluates the average freq of the last ms on core 0):
===============
#ifdef(X86_AVERAGE_FREQUENCY) /* also make
u64 saved_aperf, saved_mperf;
struct cpufreq_policy;
struct cpuinfo_x86 *cpu = &cpu_data(cpu);
int cpu = 0;
unsigned int average_freq;

if (!cpu_has(cpu, X86_FEATURE_APERF_MPERF)) /* Introduced in next patch */
    return;
cpufreq_get_policy(policy, cpu);
get_average_perf(policy, cpu, &saved_aperf, &saved_mperf);
msleep(1);
average_freq = get_average_perf(policy, cpu, &saved_aperf, &saved_mperf);
#endif
===============

One could now easily add a debug monitor of the average freq of a process'
life cycle, there are probably other use-cases, I could imagine sched_mc
developers may find this interface convenient for debugging/optimizing.

Additional modification:
Use smp_call_function_single instead of work_on_cpu.
The latter was broken anyway: 0 was always returned as the called function
read_measured_perf_ctrs always returns zero which work_on_cpu's return value
was wrongly checked for:
if (!work_on_cpu(cpu, read_measured_perf_ctrs, &readin))
		return 0;

Signed-off-by: Thomas Renninger <trenn@xxxxxxx>
Cc: <linux-acpi@xxxxxxxxxxxxxxx>
Cc: Pallipadi Venkatesh <venkatesh.pallipadi@xxxxxxxxx>
Cc: <cpufreq@xxxxxxxxxxxxxxx>
Cc: <svaidy@xxxxxxxxxxxxxxxxxx>
Cc: <suresh.b.siddha@xxxxxxxxx>
---
 arch/x86/kernel/cpu/cpufreq/Kconfig             |   15 +++
 arch/x86/kernel/cpu/cpufreq/Makefile            |    1 +
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c      |   94 +--------------
 arch/x86/kernel/cpu/cpufreq/average_frequency.c |  146 +++++++++++++++++++++++
 include/linux/cpufreq.h                         |   13 ++
 5 files changed, 178 insertions(+), 91 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/cpufreq/average_frequency.c

diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 52c8398..3ca1602 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -26,6 +26,21 @@ config X86_ACPI_CPUFREQ
 
 	  If in doubt, say N.
 
+config X86_AVERAGE_FREQUENCY
+	bool "Calculate and consider average frequency over a time period"
+	depends on CPU_FREQ_TABLE
+	help
+	  Latest X86 Intel processors can overclock a single core
+	  behind the kernel's back (ida cpuinfo flag) if specific requirements
+	  are met.
+	  With this option, the kernel can evaluate the real frequency a core
+	  was running on over a time period and kernel parts, for example
+	  the cpufreq core and governor or later the scheduler can consider and
+	  optimize for the "boost" frequency on such processors.
+	  Currently the only driver which serves such processors is acpi-cpufreq.
+	  This option should be enabled for this driver at least
+	  on processors which show the "ida" flag in /proc/cpuinfo
+
 config ELAN_CPUFREQ
 	tristate "AMD Elan SC400 and SC410"
 	select CPU_FREQ_TABLE
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile
index 509296d..3eb5a64 100644
--- a/arch/x86/kernel/cpu/cpufreq/Makefile
+++ b/arch/x86/kernel/cpu/cpufreq/Makefile
@@ -2,6 +2,7 @@
 # K8 systems. ACPI is preferred to all other hardware-specific drivers.
 # speedstep-* is preferred over p4-clockmod.
 
+obj-$(CONFIG_X86_AVERAGE_FREQUENCY)     += average_frequency.o
 obj-$(CONFIG_X86_POWERNOW_K8)		+= powernow-k8.o
 obj-$(CONFIG_X86_ACPI_CPUFREQ)		+= acpi-cpufreq.o
 obj-$(CONFIG_X86_POWERNOW_K6)		+= powernow-k6.o
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 208ecf6..bf9b6b1 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -248,100 +248,12 @@ static u32 get_cur_val(const struct cpumask *mask)
 	return cmd.val;
 }
 
-struct perf_pair {
-	union {
-		struct {
-			u32 lo;
-			u32 hi;
-		} split;
-		u64 whole;
-	} aperf, mperf;
-};
-
-/* Called via smp_call_function_single(), on the target CPU */
-static void read_measured_perf_ctrs(void *_cur)
-{
-	struct perf_pair *cur = _cur;
-
-	rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
-	rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
-}
-
-/*
- * Return the measured active (C0) frequency on this CPU since last call
- * to this function.
- * Input: cpu number
- * Return: Average CPU frequency in terms of max frequency (zero on error)
- *
- * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
- * over a period of time, while CPU is in C0 state.
- * IA32_MPERF counts at the rate of max advertised frequency
- * IA32_APERF counts at the rate of actual CPU frequency
- * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
- * no meaning should be associated with absolute values of these MSRs.
- */
 static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 				      unsigned int cpu)
 {
-	struct perf_pair readin, cur;
-	unsigned int perf_percent;
-	unsigned int retval;
-
-	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
-		return 0;
-
-	cur.aperf.whole = readin.aperf.whole -
-				per_cpu(msr_data, cpu).saved_aperf;
-	cur.mperf.whole = readin.mperf.whole -
-				per_cpu(msr_data, cpu).saved_mperf;
-	per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
-	per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
-
-#ifdef __i386__
-	/*
-	 * We dont want to do 64 bit divide with 32 bit kernel
-	 * Get an approximate value. Return failure in case we cannot get
-	 * an approximate value.
-	 */
-	if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
-		int shift_count;
-		u32 h;
-
-		h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
-		shift_count = fls(h);
-
-		cur.aperf.whole >>= shift_count;
-		cur.mperf.whole >>= shift_count;
-	}
-
-	if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
-		int shift_count = 7;
-		cur.aperf.split.lo >>= shift_count;
-		cur.mperf.split.lo >>= shift_count;
-	}
-
-	if (cur.aperf.split.lo && cur.mperf.split.lo)
-		perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
-	else
-		perf_percent = 0;
-
-#else
-	if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
-		int shift_count = 7;
-		cur.aperf.whole >>= shift_count;
-		cur.mperf.whole >>= shift_count;
-	}
-
-	if (cur.aperf.whole && cur.mperf.whole)
-		perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
-	else
-		perf_percent = 0;
-
-#endif
-
-	retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
-
-	return retval;
+	return get_average_perf(policy, cpu,
+				&per_cpu(msr_data, cpu).saved_aperf,
+				&per_cpu(msr_data, cpu).saved_mperf);
 }
 
 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
diff --git a/arch/x86/kernel/cpu/cpufreq/average_frequency.c b/arch/x86/kernel/cpu/cpufreq/average_frequency.c
new file mode 100644
index 0000000..2b18a75
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpufreq/average_frequency.c
@@ -0,0 +1,146 @@
+/*
+ *  average_frequency.c
+ *
+ *  Copyright (C) 2009       Thomas Renninger <trenn@xxxxxxx> (Novell)
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * Code taken from acpi-cpufreq which initially came from
+ * Mike Travis <travis@xxxxxxx> and
+ * Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpufreq.h>
+#include <linux/workqueue.h>
+
+#include <asm/msr.h>
+
+struct perf_pair {
+	union {
+		struct {
+			u32 lo;
+			u32 hi;
+		} split;
+		s64 whole;
+	} aperf, mperf;
+};
+
+static void read_measured_perf_ctrs(void *_cur)
+{
+	struct perf_pair *cur = _cur;
+
+	rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
+	rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
+}
+
+/*
+ * Return the measured active (C0) frequency on this CPU since last call
+ * to this function.
+ * Input: cpu number
+ *        cpufreq policy -> must at least have cpuinfo.max_freq be set
+ *        saved_mperf    -> register value of last call, will get updated
+ *        saved_aperf    -> register value of last call, will get updated
+ *
+ * Return: Average CPU frequency in terms of max frequency (zero on error)
+ *         since the function has been called the last time with saved
+ *         aperf/mperf values.
+ *
+ * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
+ * over a period of time, while CPU is in C0 state.
+ * IA32_MPERF counts at the rate of max advertised frequency
+ * IA32_APERF counts at the rate of actual CPU frequency
+ * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
+ * no meaning should be associated with absolute values of these MSRs.
+ *
+ * Callers must make sure that the X86_FEATURE_IDA bit is set.
+ */
+unsigned int get_average_perf(struct cpufreq_policy *policy, unsigned int cpu,
+			      u64 *saved_aperf, u64 *saved_mperf)
+{
+	struct perf_pair readin, cur;
+	unsigned int perf_percent;
+	unsigned int retval;
+
+	smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1);
+
+	/* Called the first time */
+	if ((*saved_aperf == 0) && (*saved_mperf == 0)) {
+		*saved_aperf = readin.aperf.whole;
+		*saved_mperf = readin.mperf.whole;
+		return 0;
+	}
+
+	cur.aperf.whole = readin.aperf.whole - *saved_aperf;
+	cur.mperf.whole = readin.mperf.whole - *saved_mperf;
+
+	/* Handle overflow gracefully */
+	if (unlikely(*saved_aperf > readin.aperf.whole))
+		cur.aperf.whole = 0ULL - readin.aperf.whole;
+	if (unlikely(*saved_mperf > readin.mperf.whole))
+		cur.mperf.whole = 0ULL - readin.mperf.whole;
+
+	*saved_aperf = readin.aperf.whole;
+	*saved_mperf = readin.mperf.whole;
+
+#ifdef __i386__
+	/*
+	 * We dont want to do 64 bit divide with 32 bit kernel
+	 * Get an approximate value. Return failure in case we cannot get
+	 * an approximate value.
+	 */
+	if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
+		int shift_count;
+		u32 h;
+
+		h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
+		shift_count = fls(h);
+
+		cur.aperf.whole >>= shift_count;
+		cur.mperf.whole >>= shift_count;
+	}
+
+	if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
+		int shift_count = 7;
+		cur.aperf.split.lo >>= shift_count;
+		cur.mperf.split.lo >>= shift_count;
+	}
+
+	if (cur.aperf.split.lo && cur.mperf.split.lo)
+		perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
+	else
+		perf_percent = 0;
+
+#else
+	if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
+		int shift_count = 7;
+		cur.aperf.whole >>= shift_count;
+		cur.mperf.whole >>= shift_count;
+	}
+
+	if (cur.aperf.whole && cur.mperf.whole)
+		perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
+	else
+		perf_percent = 0;
+
+#endif
+	retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
+	return retval;
+}
+EXPORT_SYMBOL_GPL(get_average_perf);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 1610427..0ab8bf7 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -362,6 +362,19 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table,
 
 void cpufreq_frequency_table_put_attr(unsigned int cpu);
 
+/*
+ * Get the average frequency since the last call of this function if the
+ * needed MSRs are supported by the CPU
+*/
+#ifdef CONFIG_X86_AVERAGE_FREQUENCY
+unsigned int get_average_perf(struct cpufreq_policy *policy, unsigned int cpu,
+			      u64 *saved_aperf, u64 *saved_mperf);
+#else
+unsigned int get_average_perf(struct cpufreq_policy *policy, unsigned int cpu,
+			      u64 *saved_aperf, u64 *saved_mperf)
+{ return 0; }
+#endif
+
 
 /*********************************************************************
  *                     UNIFIED DEBUG HELPERS                         *
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe cpufreq" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Devel]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Forum]     [Linux SCSI]

  Powered by Linux