+ slb-charge-slabs-to-kmemcg-explicitly.patch added to -mm tree

akpm@xxxxxxxxxxxxxxxxxxxx · Thu, 10 Apr 2014 16:38:39 -0700

Subject: + slb-charge-slabs-to-kmemcg-explicitly.patch added to -mm tree
To: vdavydov@xxxxxxxxxxxxx,cl@xxxxxxxxxxxxxxxxxxxx,glommer@xxxxxxxxx,gthelen@xxxxxxxxxx,hannes@xxxxxxxxxxx,mhocko@xxxxxxx,penberg@xxxxxxxxxx
From: akpm@xxxxxxxxxxxxxxxxxxxx
Date: Thu, 10 Apr 2014 16:38:39 -0700


The patch titled
     Subject: sl[au]b: charge slabs to kmemcg explicitly
has been added to the -mm tree.  Its filename is
     slb-charge-slabs-to-kmemcg-explicitly.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/slb-charge-slabs-to-kmemcg-explicitly.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/slb-charge-slabs-to-kmemcg-explicitly.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/SubmitChecklist when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>
Subject: sl[au]b: charge slabs to kmemcg explicitly

We have only a few places where we actually want to charge kmem so instead
of intruding into the general page allocation path with __GFP_KMEMCG it's
better to explictly charge kmem there.  All kmem charges will be easier to
follow that way.

This is a step towards removing __GFP_KMEMCG.  It removes __GFP_KMEMCG
from memcg caches' allocflags.  Instead it makes slab allocation path call
memcg_charge_kmem directly getting memcg to charge from the cache's memcg
params.

This also eliminates any possibility of misaccounting an allocation going
from one memcg's cache to another memcg, because now we always charge
slabs against the memcg the cache belongs to.  That's why this patch
removes the big comment to memcg_kmem_get_cache.

Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx>
Acked-by: Greg Thelen <gthelen@xxxxxxxxxx>
Cc: Johannes Weiner <hannes@xxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxx>
Cc: Glauber Costa <glommer@xxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxxxxxxxxxxxxx>
Cc: Pekka Enberg <penberg@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/memcontrol.h |   15 ++++-----------
 mm/memcontrol.c            |    4 ++--
 mm/slab.c                  |    7 ++++++-
 mm/slab.h                  |   29 +++++++++++++++++++++++++++++
 mm/slab_common.c           |    6 +-----
 mm/slub.c                  |   24 +++++++++++++++++-------
 6 files changed, 59 insertions(+), 26 deletions(-)

diff -puN include/linux/memcontrol.h~slb-charge-slabs-to-kmemcg-explicitly include/linux/memcontrol.h

--- a/include/linux/memcontrol.h~slb-charge-slabs-to-kmemcg-explicitly
+++ a/include/linux/memcontrol.h
@@ -506,6 +506,9 @@ void memcg_update_array_size(int num_gro
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
+
 void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
@@ -583,17 +586,7 @@ memcg_kmem_commit_charge(struct page *pa
  * @cachep: the original global kmem cache
  * @gfp: allocation flags.
  *
- * This function assumes that the task allocating, which determines the memcg
- * in the page allocator, belongs to the same cgroup throughout the whole
- * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
- * while belonging to a cgroup, and later on changes. This is considered
- * acceptable, and should only happen upon task migration.
- *
- * Before the cache is created by the memcg core, there is also a possible
- * imbalance: the task belongs to a memcg, but the cache being allocated from
- * is the global cache, since the child cache is not yet guaranteed to be
- * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
- * passed and the page allocator will not attempt any cgroup accounting.
+ * All memory allocated from a per-memcg cache is charged to the owner memcg.
  */
 static __always_inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
diff -puN mm/memcontrol.c~slb-charge-slabs-to-kmemcg-explicitly mm/memcontrol.c
--- a/mm/memcontrol.c~slb-charge-slabs-to-kmemcg-explicitly
+++ a/mm/memcontrol.c
@@ -2944,7 +2944,7 @@ static int mem_cgroup_slabinfo_read(stru
 }
 #endif
 
-static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
 	struct res_counter *fail_res;
 	int ret = 0;
@@ -2982,7 +2982,7 @@ static int memcg_charge_kmem(struct mem_
 	return ret;
 }
 
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
 {
 	res_counter_uncharge(&memcg->res, size);
 	if (do_swap_account)
diff -puN mm/slab.c~slb-charge-slabs-to-kmemcg-explicitly mm/slab.c
--- a/mm/slab.c~slb-charge-slabs-to-kmemcg-explicitly
+++ a/mm/slab.c
@@ -1664,8 +1664,12 @@ static struct page *kmem_getpages(struct
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		flags |= __GFP_RECLAIMABLE;
 
+	if (memcg_charge_slab(cachep, flags, cachep->gfporder))
+		return NULL;
+
 	page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
 	if (!page) {
+		memcg_uncharge_slab(cachep, cachep->gfporder);
 		if (!(flags & __GFP_NOWARN) && printk_ratelimit())
 			slab_out_of_memory(cachep, flags, nodeid);
 		return NULL;
@@ -1724,7 +1728,8 @@ static void kmem_freepages(struct kmem_c
 	memcg_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	__free_memcg_kmem_pages(page, cachep->gfporder);
+	__free_pages(page, cachep->gfporder);
+	memcg_uncharge_slab(cachep, cachep->gfporder);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
diff -puN mm/slab.h~slb-charge-slabs-to-kmemcg-explicitly mm/slab.h
--- a/mm/slab.h~slb-charge-slabs-to-kmemcg-explicitly
+++ a/mm/slab.h
@@ -191,6 +191,26 @@ static inline struct kmem_cache *memcg_r
 		return s;
 	return s->memcg_params->root_cache;
 }
+
+static __always_inline int memcg_charge_slab(struct kmem_cache *s,
+					     gfp_t gfp, int order)
+{
+	if (!memcg_kmem_enabled())
+		return 0;
+	if (is_root_cache(s))
+		return 0;
+	return memcg_charge_kmem(s->memcg_params->memcg, gfp,
+				 PAGE_SIZE << order);
+}
+
+static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
+{
+	if (!memcg_kmem_enabled())
+		return;
+	if (is_root_cache(s))
+		return;
+	memcg_uncharge_kmem(s->memcg_params->memcg, PAGE_SIZE << order);
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -226,6 +246,15 @@ static inline struct kmem_cache *memcg_r
 {
 	return s;
 }
+
+static inline int memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, int order)
+{
+	return 0;
+}
+
+static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
+{
+}
 #endif
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff -puN mm/slab_common.c~slb-charge-slabs-to-kmemcg-explicitly mm/slab_common.c
--- a/mm/slab_common.c~slb-charge-slabs-to-kmemcg-explicitly
+++ a/mm/slab_common.c
@@ -290,12 +290,8 @@ void kmem_cache_create_memcg(struct mem_
 				 root_cache->size, root_cache->align,
 				 root_cache->flags, root_cache->ctor,
 				 memcg, root_cache);
-	if (IS_ERR(s)) {
+	if (IS_ERR(s))
 		kfree(cache_name);
-		goto out_unlock;
-	}
-
-	s->allocflags |= __GFP_KMEMCG;
 
 out_unlock:
 	mutex_unlock(&slab_mutex);
diff -puN mm/slub.c~slb-charge-slabs-to-kmemcg-explicitly mm/slub.c
--- a/mm/slub.c~slb-charge-slabs-to-kmemcg-explicitly
+++ a/mm/slub.c
@@ -1317,17 +1317,26 @@ static inline void slab_free_hook(struct
 /*
  * Slab allocation and freeing
  */
-static inline struct page *alloc_slab_page(gfp_t flags, int node,
-					struct kmem_cache_order_objects oo)
+static inline struct page *alloc_slab_page(struct kmem_cache *s,
+		gfp_t flags, int node, struct kmem_cache_order_objects oo)
 {
+	struct page *page;
 	int order = oo_order(oo);
 
 	flags |= __GFP_NOTRACK;
 
+	if (memcg_charge_slab(s, flags, order))
+		return NULL;
+
 	if (node == NUMA_NO_NODE)
-		return alloc_pages(flags, order);
+		page = alloc_pages(flags, order);
 	else
-		return alloc_pages_exact_node(node, flags, order);
+		page = alloc_pages_exact_node(node, flags, order);
+
+	if (!page)
+		memcg_uncharge_slab(s, order);
+
+	return page;
 }
 
 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1349,14 +1358,14 @@ static struct page *allocate_slab(struct
 	 */
 	alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
 
-	page = alloc_slab_page(alloc_gfp, node, oo);
+	page = alloc_slab_page(s, alloc_gfp, node, oo);
 	if (unlikely(!page)) {
 		oo = s->min;
 		/*
 		 * Allocation may have failed due to fragmentation.
 		 * Try a lower order alloc if possible
 		 */
-		page = alloc_slab_page(flags, node, oo);
+		page = alloc_slab_page(s, flags, node, oo);
 
 		if (page)
 			stat(s, ORDER_FALLBACK);
@@ -1472,7 +1481,8 @@ static void __free_slab(struct kmem_cach
 	page_mapcount_reset(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-	__free_memcg_kmem_pages(page, order);
+	__free_pages(page, order);
+	memcg_uncharge_slab(s, order);
 }
 
 #define need_reserve_slab_rcu						\
_

Patches currently in -mm which might be from vdavydov@xxxxxxxxxxxxx are

origin.patch
slb-charge-slabs-to-kmemcg-explicitly.patch
mm-get-rid-of-__gfp_kmemcg.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html