RFC: cgroups aware proc

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Happy new year guys.

I need to have /proc cgroups aware, as I want to have LXC containers that see only the resources that are given to them.

In order to do that I had to patch the kernel. I decided to start with cpuinfo, stat and interrupts and then continue with meminfo and loadavg.

I managed to patch the Kernel (linux 3.12.0) and make /proc/cpuinfo, /proc/stat and /proc/interrupts be cgroups aware.

Attached are the patches that make the necessary changes.

The change for /proc/cpuinfo and /proc/interrupts is currently done only for x86 arch, but I will patch the rest of the architectures if the style of the patches is acceptable.

Tomorrow I will check if the patches apply and build with the latest kernel.

Best regards,
Marian


>From 94891538f4a6a6b57aab0a2b917589ba73adfad9 Mon Sep 17 00:00:00 2001
From: Marian Marinov <mm@xxxxxxxx>
Date: Sat, 4 Jan 2014 05:45:42 +0200
Subject: [PATCH 1/2] arch/x86/kernel/cpu/proc.c: Make /proc/cpuinfo display
 cpu information relative only to the current cgroup - added linux/cgroup.h
 include because it is needed for the cpumask_test_cpu() - addded a
 task_struct to c_start() - and added a loop that will skip all CPUs that are
 not part of the current cgroup by using the cpus_allowed mask from the
 task_struct

Signed-off-by: Marian Marinov <mm@xxxxxxxx>
---
 arch/x86/kernel/cpu/proc.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index aee6317..d9e9fb6 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -3,6 +3,7 @@
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
+#include <linux/cgroup.h>
 
 /*
  *	Get CPU information for use by the procfs.
@@ -133,9 +134,19 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
+	struct task_struct *tsk;
 	*pos = cpumask_next(*pos - 1, cpu_online_mask);
-	if ((*pos) < nr_cpu_ids)
+	tsk = current_thread_info()->task;
+	if ((*pos) < nr_cpu_ids) {
+		if (tsk != NULL) {
+			while (cpumask_test_cpu((*pos), &tsk->cpus_allowed) == 0) {
+				(*pos)++;
+				if ((*pos) >= nr_cpu_ids)
+					return NULL;
+			}
+		}
 		return &cpu_data(*pos);
+	}
 	return NULL;
 }
 
-- 
1.8.4

>From ff68f073cb90316baa78936ff219a155788e29c2 Mon Sep 17 00:00:00 2001
From: Marian Marinov <mm@xxxxxxxx>
Date: Sat, 4 Jan 2014 06:10:24 +0200
Subject: [PATCH 1/1] arch/x86/kernel/irq.c: Made /proc/interrupts to be
 cgroups aware - print only the CPUs that are part of the current cgroup -
 Added code to handle Kconfig options - Added code to skip all CPUs that are
 not part of the current cgroup using the task_struct's allowed_cpus mask

Signed-off-by: Marian Marinov <mm@xxxxxxxx>
---
 arch/x86/kernel/irq.c | 73 +++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 22d0687..b0a17c0 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -54,75 +54,120 @@ void ack_bad_irq(unsigned int irq)
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
 	int j;
+#ifdef CONFIG_CPUSETS
+	struct task_struct *tsk;
+	tsk = current_thread_info()->task;
+#endif
 
 	seq_printf(p, "%*s: ", prec, "NMI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
 	seq_printf(p, "  Non-maskable interrupts\n");
 #ifdef CONFIG_X86_LOCAL_APIC
 	seq_printf(p, "%*s: ", prec, "LOC");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
 	seq_printf(p, "  Local timer interrupts\n");
 
 	seq_printf(p, "%*s: ", prec, "SPU");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
 	seq_printf(p, "  Spurious interrupts\n");
 	seq_printf(p, "%*s: ", prec, "PMI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
 	seq_printf(p, "  Performance monitoring interrupts\n");
 	seq_printf(p, "%*s: ", prec, "IWI");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
 	seq_printf(p, "  IRQ work interrupts\n");
 	seq_printf(p, "%*s: ", prec, "RTR");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
 	seq_printf(p, "  APIC ICR read retries\n");
 #endif
 	if (x86_platform_ipi_callback) {
 		seq_printf(p, "%*s: ", prec, "PLT");
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
+#ifdef CONFIG_CPUSETS
+			if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+				seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
 		seq_printf(p, "  Platform interrupts\n");
 	}
 #ifdef CONFIG_SMP
 	seq_printf(p, "%*s: ", prec, "RES");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
 	seq_printf(p, "  Rescheduling interrupts\n");
 	seq_printf(p, "%*s: ", prec, "CAL");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
 					irq_stats(j)->irq_tlb_count);
 	seq_printf(p, "  Function call interrupts\n");
 	seq_printf(p, "%*s: ", prec, "TLB");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
 	seq_printf(p, "  TLB shootdowns\n");
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
 	seq_printf(p, "%*s: ", prec, "TRM");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 	seq_printf(p, "  Thermal event interrupts\n");
 #endif
 #ifdef CONFIG_X86_MCE_THRESHOLD
 	seq_printf(p, "%*s: ", prec, "THR");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
 #endif
 #ifdef CONFIG_X86_MCE
 	seq_printf(p, "%*s: ", prec, "MCE");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
 	seq_printf(p, "  Machine check exceptions\n");
 	seq_printf(p, "%*s: ", prec, "MCP");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(j, &tsk->cpus_allowed))
+#endif
+			seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
 	seq_printf(p, "  Machine check polls\n");
 #endif
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
-- 
1.8.4

>From 00af9f7b5eeef770d0da240a6bf2064a2ba11e47 Mon Sep 17 00:00:00 2001
From: Marian Marinov <mm@xxxxxxxx>
Date: Sat, 4 Jan 2014 06:03:11 +0200
Subject: [PATCH 1/1] fs/proc/stat.c & kernel/sched/stats.c: List only the CPUs
 that are in the current cpuset

- Added a check to allow the display of cpu information only if the cpu is part of the current cpu set using the task_struct allowed_cpus

Signed-off-by: Marian Marinov <mm@xxxxxxxx>
---
 fs/proc/stat.c       | 14 ++++++++++++++
 kernel/sched/stats.c |  9 +++++++++
 2 files changed, 23 insertions(+)

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 1cf86c0..e5ca3ef 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -87,6 +87,11 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
 	struct timespec boottime;
+#ifdef CONFIG_CPUSETS
+	struct task_struct *tsk;
+
+	tsk = current_thread_info()->task;
+#endif
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = 0;
@@ -94,7 +99,12 @@ static int show_stat(struct seq_file *p, void *v)
 	getboottime(&boottime);
 	jif = boottime.tv_sec;
 
+
 	for_each_possible_cpu(i) {
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(i, &tsk->cpus_allowed) == 0)
+			continue;
+#endif
 		user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
 		nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
 		system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
@@ -142,6 +152,10 @@ static int show_stat(struct seq_file *p, void *v)
 		steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
 		guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
 		guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(i, &tsk->cpus_allowed) == 0)
+			continue;
+#endif
 		seq_printf(p, "cpu%d", i);
 		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
 		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index da98af3..5897358 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -17,6 +17,10 @@ static int show_schedstat(struct seq_file *seq, void *v)
 	int cpu;
 	int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
 	char *mask_str = kmalloc(mask_len, GFP_KERNEL);
+#ifdef CONFIG_CPUSETS
+	struct task_struct *tsk;
+	tsk = current_thread_info()->task;
+#endif
 
 	if (mask_str == NULL)
 		return -ENOMEM;
@@ -33,6 +37,11 @@ static int show_schedstat(struct seq_file *seq, void *v)
 		cpu = (unsigned long)(v - 2);
 		rq = cpu_rq(cpu);
 
+#ifdef CONFIG_CPUSETS
+		if (tsk != NULL && cpumask_test_cpu(cpu, &tsk->cpus_allowed) == 0)
+			return 0;
+#endif
+
 		/* runqueue-specific stats */
 		seq_printf(seq,
 		    "cpu%d %u 0 %u %u %u %u %llu %llu %lu",
-- 
1.8.4

>From dec97e6141f92109c0cd02883cff20e3f1429564 Mon Sep 17 00:00:00 2001
From: Marian Marinov <mm@xxxxxxxx>
Date: Sat, 4 Jan 2014 05:50:03 +0200
Subject: [PATCH 2/2] arch/x86/kernel/cpu/proc.c: Added Kconfig option handling

Signed-off-by: Marian Marinov <mm@xxxxxxxx>
---
 arch/x86/kernel/cpu/proc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d9e9fb6..114fd95 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -3,7 +3,10 @@
 #include <linux/string.h>
 #include <linux/seq_file.h>
 #include <linux/cpufreq.h>
+
+#ifdef CONFIG_CPUSETS
 #include <linux/cgroup.h>
+#endif
 
 /*
  *	Get CPU information for use by the procfs.
@@ -134,10 +137,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
+#ifdef CONFIG_CPUSETS
 	struct task_struct *tsk;
+#endif
 	*pos = cpumask_next(*pos - 1, cpu_online_mask);
-	tsk = current_thread_info()->task;
 	if ((*pos) < nr_cpu_ids) {
+#ifdef CONFIG_CPUSETS
+		tsk = current_thread_info()->task;
 		if (tsk != NULL) {
 			while (cpumask_test_cpu((*pos), &tsk->cpus_allowed) == 0) {
 				(*pos)++;
@@ -145,6 +151,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
 					return NULL;
 			}
 		}
+#endif
 		return &cpu_data(*pos);
 	}
 	return NULL;
-- 
1.8.4


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]     [Monitors]

  Powered by Linux