New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled. cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported in nano-seconds We do not know the cpufreq table size at compile time. So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced, to determine the cpufreq table per-cpu in the cpuacct struct. Signed-off-by: Mike Chan <mike@xxxxxxxxxxx> --- init/Kconfig | 5 +++ kernel/sched.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 0 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index eb77e8c..e1e86df 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -534,6 +534,11 @@ config CGROUP_CPUACCT Provides a simple Resource Controller for monitoring the total CPU consumed by the tasks in a cgroup. +config CPUACCT_CPUFREQ_TABLE_MAX + int "Max CPUFREQ table size" + depends on CGROUP_CPUACCT && CPU_FREQ_TABLE + default 32 + config RESOURCE_COUNTERS bool "Resource counters" help diff --git a/kernel/sched.c b/kernel/sched.c index 528a105..a0b56b5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -71,6 +71,7 @@ #include <linux/debugfs.h> #include <linux/ctype.h> #include <linux/ftrace.h> +#include <linux/cpufreq.h> #include <asm/tlb.h> #include <asm/irq_regs.h> @@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = { * (balbir@xxxxxxxxxx). */ +#ifdef CONFIG_CPU_FREQ_STAT +/* The alloc_percpu macro uses typeof so we must define a type here. */ +typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t; +#endif + /* track cpu usage of a group of tasks and its child groups */ struct cpuacct { struct cgroup_subsys_state css; @@ -8824,6 +8830,10 @@ struct cpuacct { u64 __percpu *cpuusage; struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; struct cpuacct *parent; +#ifdef CONFIG_CPU_FREQ_STAT + /* cpufreq_usage is a pointer to an array of u64-types on every cpu */ + cpufreq_usage_t *cpufreq_usage; +#endif }; struct cgroup_subsys cpuacct_subsys; @@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create( if (!ca->cpuusage) goto out_free_ca; +#ifdef CONFIG_CPU_FREQ_STAT + ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t); +#endif + for (i = 0; i < CPUACCT_STAT_NSTATS; i++) if (percpu_counter_init(&ca->cpustat[i], 0)) goto out_free_counters; @@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) kfree(ca); } +#ifdef CONFIG_CPU_FREQ_STAT +static int cpufreq_index; +static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val, + void *data) +{ + int ret; + struct cpufreq_policy *policy; + struct cpufreq_freqs *freq = data; + struct cpufreq_frequency_table *table; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + /* Update cpufreq_index with current speed */ + table = cpufreq_frequency_get_table(freq->cpu); + policy = cpufreq_cpu_get(freq->cpu); + ret = cpufreq_frequency_table_target(policy, table, + cpufreq_quick_get(freq->cpu), + CPUFREQ_RELATION_L, &cpufreq_index); + cpufreq_cpu_put(policy); + return ret; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpuacct_cpufreq_notify, +}; + +static __init int cpuacct_init(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} + +module_init(cpuacct_init); + +static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft, + struct cgroup_map_cb *cb) +{ + int i; + unsigned int cpu; + char buf[32]; + struct cpuacct *ca = cgroup_ca(cgrp); + struct cpufreq_frequency_table *table = + cpufreq_frequency_get_table(smp_processor_id()); + + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + u64 total = 0; + + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) + continue; + + for_each_present_cpu(cpu) { + cpufreq_usage_t *cpufreq_usage; + + cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu); + table = cpufreq_frequency_get_table(cpu); + + total += cpufreq_usage->usage[i]; + } + + snprintf(buf, sizeof(buf), "%d", table[i].frequency); + cb->fill(cb, buf, total); + } + + return 0; +} +#endif + static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) { u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); @@ -9003,6 +9085,12 @@ static struct cftype files[] = { .name = "stat", .read_map = cpuacct_stats_show, }, +#ifdef CONFIG_CPU_FREQ_STAT + { + .name = "cpufreq", + .read_map = cpuacct_cpufreq_show, + }, +#endif }; static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) @@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) for (; ca; ca = ca->parent) { u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); +#ifdef CONFIG_CPU_FREQ_STAT + cpufreq_usage_t *cpufreq_usage = + per_cpu_ptr(ca->cpufreq_usage, cpu); + + if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX) + printk_once(KERN_WARNING "cpuacct_charge: " + "cpufreq_index: %d exceeds max table " + "size\n", cpufreq_index); + else + cpufreq_usage->usage[cpufreq_index] += cputime; +#endif *cpuusage += cputime; } -- 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe cpufreq" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html