Re: [PATCH 08/10] memcg: Add CONFIG_CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 02/27/2012 07:58 PM, Suleiman Souhlal wrote:
This config option dictates whether or not kernel memory in the
root cgroup should be accounted.

This may be useful in an environment where everything is supposed to be
in a cgroup and accounted for. Large amounts of kernel memory in the
root cgroup would indicate problems with memory isolation or accounting.

I don't like accounting this stuff to the root memory cgroup. This causes overhead for everybody, including people who couldn't care less about memcg.

If it were up to me, we would simply not account it, and end of story.

However, if this is terribly important for you, I think you need to at
least make it possible to enable it at runtime, and default it to disabled.


Signed-off-by: Suleiman Souhlal<suleiman@xxxxxxxxxx>
---
  init/Kconfig    |    8 ++++++++
  mm/memcontrol.c |   44 ++++++++++++++++++++++++++++++++++++++++++++
  2 files changed, 52 insertions(+), 0 deletions(-)

diff --git a/init/Kconfig b/init/Kconfig
index 3f42cd6..a119270 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -714,6 +714,14 @@ config CGROUP_MEM_RES_CTLR_KMEM
  	  Memory Controller, which are page-based, and can be swapped. Users of
  	  the kmem extension can use it to guarantee that no group of processes
  	  will ever exhaust kernel resources alone.
+config CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT
+	bool "Root Cgroup Kernel Memory Accounting (EXPERIMENTAL)"
+	depends on CGROUP_MEM_RES_CTLR_KMEM
+	default n
+	help
+	  Account for kernel memory used by the root cgroup. This may be useful
+	  to know how much kernel memory isn't currently accounted to any
+	  cgroup.

  config CGROUP_PERF
  	bool "Enable perf_event per-cpu per-container group (cgroup) monitoring"
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6a475ed..d4cdb8e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -61,6 +61,10 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly;
  #define MEM_CGROUP_RECLAIM_RETRIES	5
  struct mem_cgroup *root_mem_cgroup __read_mostly;

+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT
+atomic64_t pre_memcg_kmem_bytes;	/* kmem usage before memcg is enabled */
+#endif
+
  #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
  /* Turned on only when memory cgroup is enabled&&  really_do_swap_account = 1 */
  int do_swap_account __read_mostly;
@@ -5643,6 +5647,13 @@ memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, long long delta)

  	if (memcg)
  		ret = res_counter_charge(&memcg->kmem_bytes, delta,&fail_res);
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT
+	else if (root_mem_cgroup != NULL)
+		ret = res_counter_charge(&root_mem_cgroup->kmem_bytes, delta,
+		&fail_res);
+	else
+		atomic64_add(delta,&pre_memcg_kmem_bytes);
+#endif

  	return ret;
  }
@@ -5668,6 +5679,12 @@ memcg_uncharge_kmem(struct mem_cgroup *memcg, long long delta)

  	if (memcg)
  		res_counter_uncharge(&memcg->kmem_bytes, delta);
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT
+	else if (root_mem_cgroup != NULL)
+		res_counter_uncharge(&root_mem_cgroup->kmem_bytes, delta);
+	else
+		atomic64_sub(delta,&pre_memcg_kmem_bytes);
+#endif

  	if (memcg&&  !memcg->independent_kmem_limit)
  		res_counter_uncharge(&memcg->res, delta);
@@ -5953,7 +5970,12 @@ memcg_slab_move(struct mem_cgroup *memcg)
  		cachep = rcu_access_pointer(memcg->slabs[i]);
  		if (cachep != NULL) {
  			rcu_assign_pointer(memcg->slabs[i], NULL);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT
+			cachep->memcg = root_mem_cgroup;
+#else
  			cachep->memcg = NULL;
+#endif

  			/* The space for this is already allocated */
  			strcat((char *)cachep->name, "dead");
@@ -5991,6 +6013,15 @@ memcg_kmem_init(struct mem_cgroup *memcg, struct mem_cgroup *parent)

  	memcg_slab_init(memcg);

+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT
+	if (memcg == root_mem_cgroup) {
+		long kmem_bytes;
+
+		kmem_bytes = atomic64_xchg(&pre_memcg_kmem_bytes, 0);
+		memcg->kmem_bytes.usage = kmem_bytes;
+	}
+#endif
+
  	atomic64_set(&memcg->kmem_bypassed, 0);
  	memcg->independent_kmem_limit = 0;
  }
@@ -6010,6 +6041,19 @@ memcg_kmem_move(struct mem_cgroup *memcg)
  	spin_unlock_irqrestore(&memcg->kmem_bytes.lock, flags);
  	if (!memcg->independent_kmem_limit)
  		res_counter_uncharge(&memcg->res, kmem_bytes);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM_ACCT_ROOT
+	{
+		struct res_counter *dummy;
+		int err;
+
+		/* Can't fail because it's the root cgroup */
+		err = res_counter_charge(&root_mem_cgroup->kmem_bytes,
+		    kmem_bytes,&dummy);
+		err = res_counter_charge(&root_mem_cgroup->res, kmem_bytes,
+		&dummy);
+	}
+#endif
  }
  #else /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
  static void

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]