Setting ignore_idle to 1 ignores idle time from time_in_state accounting. Currently cpufreq stats accounts for idle time time_in_state for each cpu speed. For cpu's that have a low power idle state this improperly accounts for time spent at each speed. The most relevant case is when the system is idle yet cpu time is still accounted for at the lowest speed. This results in heavily skewed statistics (towards the lowest speed) which makes these statistics useless when tuning cpufreq scaling with cpuidle. Signed-off-by: Mike Chan <mike@xxxxxxxxxxx> --- drivers/cpufreq/cpufreq_stats.c | 42 +++++++++++++++++++++++++++++++++++--- include/linux/cpufreq.h | 5 ++++ kernel/sched.c | 2 + 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index 5a62d67..8add3ac 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -23,10 +23,11 @@ static spinlock_t cpufreq_stats_lock; -#define CPUFREQ_STATDEVICE_ATTR(_name, _mode, _show) \ +#define CPUFREQ_STATDEVICE_ATTR(_name, _mode, _show, _store) \ static struct freq_attr _attr_##_name = {\ .attr = {.name = __stringify(_name), .mode = _mode, }, \ .show = _show,\ + .store = _store,\ }; struct cpufreq_stats { @@ -43,6 +44,8 @@ struct cpufreq_stats { #endif }; +static int ignore_idle; + static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table); struct cpufreq_stats_attribute { @@ -136,11 +139,27 @@ static ssize_t show_trans_table(struct cpufreq_policy *policy, char *buf) return PAGE_SIZE; return len; } -CPUFREQ_STATDEVICE_ATTR(trans_table, 0444, show_trans_table); +CPUFREQ_STATDEVICE_ATTR(trans_table, 0444, show_trans_table, NULL); #endif -CPUFREQ_STATDEVICE_ATTR(total_trans, 0444, show_total_trans); -CPUFREQ_STATDEVICE_ATTR(time_in_state, 0444, show_time_in_state); +static ssize_t store_ignore_idle(struct cpufreq_policy *policy, char *buf) +{ + int input; + if (sscanf(buf, "%d", &input) != 1) + return -EINVAL; + + ignore_idle = input; + return 1; +} + +static ssize_t show_ignore_idle(struct cpufreq_policy *policy, char *buf) +{ + return sprintf(buf, "%d\n", ignore_idle); +} + +CPUFREQ_STATDEVICE_ATTR(total_trans, 0444, show_total_trans, NULL); +CPUFREQ_STATDEVICE_ATTR(time_in_state, 0444, show_time_in_state, NULL); +CPUFREQ_STATDEVICE_ATTR(ignore_idle, 0664, show_ignore_idle, store_ignore_idle); static struct attribute *default_attrs[] = { &_attr_total_trans.attr, @@ -304,6 +323,21 @@ static int cpufreq_stat_notifier_trans(struct notifier_block *nb, return 0; } +void cpufreq_exit_idle(int cpu, unsigned long ticks) +{ + struct cpufreq_stats *stat; + stat = per_cpu(cpufreq_stats_table, cpu); + + /* Wait until cpu stats is initalized */ + if (!ignore_idle || !stat || !stat->time_in_state) + return; + + spin_lock(&cpufreq_stats_lock); + stat->time_in_state[stat->last_index] = + cputime_sub(stat->time_in_state[stat->last_index], ticks); + spin_unlock(&cpufreq_stats_lock); +} + static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 4de02b1..e3f1363 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -256,6 +256,11 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state); +#ifdef CONFIG_CPU_FREQ_STAT +extern void cpufreq_exit_idle(int cpu, unsigned long ticks); +#else +#define cpufreq_exit_idle(int cpu, unsigned long ticks) do {} while (0) +#endif static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned int min, unsigned int max) { diff --git a/kernel/sched.c b/kernel/sched.c index c535cc4..feef94c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -54,6 +54,7 @@ #include <linux/rcupdate.h> #include <linux/cpu.h> #include <linux/cpuset.h> +#include <linux/cpufreq.h> #include <linux/percpu.h> #include <linux/kthread.h> #include <linux/proc_fs.h> @@ -5187,6 +5188,7 @@ void account_steal_ticks(unsigned long ticks) */ void account_idle_ticks(unsigned long ticks) { + cpufreq_exit_idle(smp_processor_id(), ticks); account_idle_time(jiffies_to_cputime(ticks)); } -- 1.6.6 -- To unsubscribe from this list: send the line "unsubscribe cpufreq" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html