[RFC][PATCH] sched: cpuacct: Track cpuusage per cpu frequency

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



New file: cpuacct.cpufreq when CONFIG_CPU_FREQ_STATS is enabled.

cpuacct.cpufreq accounts for cpu time per-cpu frequency, time is exported
in nano-seconds

We do not know the cpufreq table size at compile time.
So a new config option CONFIG_CPUACCT_CPUFREQ_TABLE_MAX is intruduced,
to determine the cpufreq table per-cpu in the cpuacct struct.

Signed-off-by: Mike Chan <mike@xxxxxxxxxxx>
---
 init/Kconfig   |    5 +++
 kernel/sched.c |   99 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 0 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index eb77e8c..e1e86df 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -534,6 +534,11 @@ config CGROUP_CPUACCT
 	  Provides a simple Resource Controller for monitoring the
 	  total CPU consumed by the tasks in a cgroup.
 
+config CPUACCT_CPUFREQ_TABLE_MAX
+	int "Max CPUFREQ table size"
+	depends on CGROUP_CPUACCT && CPU_FREQ_TABLE
+	default 32
+
 config RESOURCE_COUNTERS
 	bool "Resource counters"
 	help
diff --git a/kernel/sched.c b/kernel/sched.c
index 528a105..a0b56b5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
+#include <linux/cpufreq.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -8817,6 +8818,11 @@ struct cgroup_subsys cpu_cgroup_subsys = {
  * (balbir@xxxxxxxxxx).
  */
 
+#ifdef CONFIG_CPU_FREQ_STAT
+/* The alloc_percpu macro uses typeof so we must define a type here. */
+typedef struct { u64 usage[CONFIG_CPUACCT_CPUFREQ_TABLE_MAX]; } cpufreq_usage_t;
+#endif
+
 /* track cpu usage of a group of tasks and its child groups */
 struct cpuacct {
 	struct cgroup_subsys_state css;
@@ -8824,6 +8830,10 @@ struct cpuacct {
 	u64 __percpu *cpuusage;
 	struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
 	struct cpuacct *parent;
+#ifdef CONFIG_CPU_FREQ_STAT
+	/* cpufreq_usage is a pointer to an array of u64-types on every cpu */
+	cpufreq_usage_t *cpufreq_usage;
+#endif
 };
 
 struct cgroup_subsys cpuacct_subsys;
@@ -8856,6 +8866,10 @@ static struct cgroup_subsys_state *cpuacct_create(
 	if (!ca->cpuusage)
 		goto out_free_ca;
 
+#ifdef CONFIG_CPU_FREQ_STAT
+	ca->cpufreq_usage = alloc_percpu(cpufreq_usage_t);
+#endif
+
 	for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
 		if (percpu_counter_init(&ca->cpustat[i], 0))
 			goto out_free_counters;
@@ -8888,6 +8902,74 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 	kfree(ca);
 }
 
+#ifdef CONFIG_CPU_FREQ_STAT
+static int cpufreq_index;
+static int cpuacct_cpufreq_notify(struct notifier_block *nb, unsigned long val,
+					void *data)
+{
+	int ret;
+	struct cpufreq_policy *policy;
+	struct cpufreq_freqs *freq = data;
+	struct cpufreq_frequency_table *table;
+
+	if (val != CPUFREQ_POSTCHANGE)
+		return 0;
+
+	/* Update cpufreq_index with current speed */
+	table = cpufreq_frequency_get_table(freq->cpu);
+	policy = cpufreq_cpu_get(freq->cpu);
+	ret = cpufreq_frequency_table_target(policy, table,
+			cpufreq_quick_get(freq->cpu),
+			CPUFREQ_RELATION_L, &cpufreq_index);
+	cpufreq_cpu_put(policy);
+	return ret;
+}
+
+static struct notifier_block cpufreq_notifier = {
+	.notifier_call = cpuacct_cpufreq_notify,
+};
+
+static __init int cpuacct_init(void)
+{
+	return cpufreq_register_notifier(&cpufreq_notifier,
+					CPUFREQ_TRANSITION_NOTIFIER);
+}
+
+module_init(cpuacct_init);
+
+static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft,
+		struct cgroup_map_cb *cb)
+{
+	int i;
+	unsigned int cpu;
+	char buf[32];
+	struct cpuacct *ca = cgroup_ca(cgrp);
+	struct cpufreq_frequency_table *table =
+		cpufreq_frequency_get_table(smp_processor_id());
+
+	for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		u64 total = 0;
+
+		if (table[i].frequency == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		for_each_present_cpu(cpu) {
+			cpufreq_usage_t *cpufreq_usage;
+
+			cpufreq_usage = per_cpu_ptr(ca->cpufreq_usage, cpu);
+			table = cpufreq_frequency_get_table(cpu);
+
+			total += cpufreq_usage->usage[i];
+		}
+
+		snprintf(buf, sizeof(buf), "%d", table[i].frequency);
+		cb->fill(cb, buf, total);
+	}
+
+	return 0;
+}
+#endif
+
 static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
 {
 	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
@@ -9003,6 +9085,12 @@ static struct cftype files[] = {
 		.name = "stat",
 		.read_map = cpuacct_stats_show,
 	},
+#ifdef CONFIG_CPU_FREQ_STAT
+	{
+		.name = "cpufreq",
+		.read_map = cpuacct_cpufreq_show,
+	},
+#endif
 };
 
 static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -9031,6 +9119,17 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 
 	for (; ca; ca = ca->parent) {
 		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+#ifdef CONFIG_CPU_FREQ_STAT
+		cpufreq_usage_t *cpufreq_usage =
+			per_cpu_ptr(ca->cpufreq_usage, cpu);
+
+		if (cpufreq_index > CONFIG_CPUACCT_CPUFREQ_TABLE_MAX)
+			printk_once(KERN_WARNING "cpuacct_charge: "
+					"cpufreq_index: %d exceeds max table "
+					"size\n", cpufreq_index);
+		else
+			cpufreq_usage->usage[cpufreq_index] += cputime;
+#endif
 		*cpuusage += cputime;
 	}
 
-- 
1.7.0.1

--
To unsubscribe from this list: send the line "unsubscribe cpufreq" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Devel]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Forum]     [Linux SCSI]

  Powered by Linux