The memcg_params::memcg_caches array can be updated concurrently from memcg_update_cache_size() and memcg_create_kmem_cache(). Although both of these functions take the slab_mutex during their operation, the latter checks if memcg's cache has already been allocated w/o taking the mutex. This can result in a race as described below. Asume two threads schedule kmem_cache creation works for the same kmem_cache of the same memcg from __memcg_kmem_get_cache(). One of the works successfully creates it. Another work should fail then, but if it interleaves with memcg_update_cache_size() as follows, it does not: memcg_create_kmem_cache() memcg_update_cache_size() (called w/o mutexes held) (called with slab_mutex held) ------------------------- ------------------------- mutex_lock(&memcg_cache_mutex) s->memcg_params=kzalloc(...) new_cachep=cache_from_memcg_idx(cachep,idx) // new_cachep==NULL => proceed to creation s->memcg_params->memcg_caches[i] =cur_params->memcg_caches[i] // kmem_cache_dup takes slab_mutex so we will // hang around here until memcg_update_cache_size() // finishes, but ... new_cachep = kmem_cache_dup(memcg, cachep) // nothing will prevent kmem_cache_dup from // succeeding so ... cachep->memcg_params->memcg_caches[idx]=new_cachep // we've overwritten an existing cache ptr! Let's fix this by moving both the check and the update of memcg_params::memcg_caches from memcg_create_kmem_cache() to kmem_cache_create_memcg() to be called under the slab_mutex. Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Glauber Costa <glommer@xxxxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Cc: Pekka Enberg <penberg@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/memcontrol.h | 9 ++-- mm/memcontrol.c | 98 +++++++++++++++----------------------------- mm/slab_common.c | 8 +++- 3 files changed, 44 insertions(+), 71 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b357ae3..fdd3f30 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -500,8 +500,8 @@ int memcg_cache_id(struct mem_cgroup *memcg); int memcg_init_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache); void memcg_free_cache_params(struct kmem_cache *s); -void memcg_release_cache(struct kmem_cache *cachep); -void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); +void memcg_register_cache(struct kmem_cache *s); +void memcg_release_cache(struct kmem_cache *s); int memcg_update_cache_size(struct kmem_cache *s, int num_groups); void memcg_update_array_size(int num_groups); @@ -652,12 +652,11 @@ static inline void memcg_free_cache_params(struct kmem_cache *s); { } -static inline void memcg_release_cache(struct kmem_cache *cachep) +static inline void memcg_register_cache(struct kmem_cache *s) { } -static inline void memcg_cache_list_add(struct mem_cgroup *memcg, - struct kmem_cache *s) +static inline void memcg_release_cache(struct kmem_cache *s) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e37fdb5..62b9991 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3059,16 +3059,6 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) css_put(&memcg->css); } -void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep) -{ - if (!memcg) - return; - - mutex_lock(&memcg->slab_caches_mutex); - list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); - mutex_unlock(&memcg->slab_caches_mutex); -} - /* * helper for acessing a memcg's index. It will be used as an index in the * child cache array in kmem_cache, and also to derive its name. This function @@ -3229,6 +3219,35 @@ void memcg_free_cache_params(struct kmem_cache *s) kfree(s->memcg_params); } +void memcg_register_cache(struct kmem_cache *s) +{ + struct kmem_cache *root; + struct mem_cgroup *memcg; + int id; + + if (is_root_cache(s)) + return; + + memcg = s->memcg_params->memcg; + id = memcg_cache_id(memcg); + root = s->memcg_params->root_cache; + + css_get(&memcg->css); + + /* + * Since readers won't lock (see cache_from_memcg_idx()), we need a + * barrier here to ensure nobody will see the kmem_cache partially + * initialized. + */ + smp_wmb(); + + root->memcg_params->memcg_caches[id] = s; + + mutex_lock(&memcg->slab_caches_mutex); + list_add(&s->memcg_params->list, &memcg->memcg_slab_caches); + mutex_unlock(&memcg->slab_caches_mutex); +} + void memcg_release_cache(struct kmem_cache *s) { struct kmem_cache *root; @@ -3356,26 +3375,13 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) schedule_work(&cachep->memcg_params->destroy); } -/* - * This lock protects updaters, not readers. We want readers to be as fast as - * they can, and they will either see NULL or a valid cache value. Our model - * allow them to see NULL, in which case the root memcg will be selected. - * - * We need this lock because multiple allocations to the same cache from a non - * will span more than one worker. Only one of them can create the cache. - */ -static DEFINE_MUTEX(memcg_cache_mutex); - -/* - * Called with memcg_cache_mutex held - */ -static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, - struct kmem_cache *s) +static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, + struct kmem_cache *s) { struct kmem_cache *new; static char *tmp_name = NULL; - lockdep_assert_held(&memcg_cache_mutex); + BUG_ON(!memcg_can_account_kmem(memcg)); /* * kmem_cache_create_memcg duplicates the given name and @@ -3403,45 +3409,6 @@ static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, return new; } -static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, - struct kmem_cache *cachep) -{ - struct kmem_cache *new_cachep; - int idx; - - BUG_ON(!memcg_can_account_kmem(memcg)); - - idx = memcg_cache_id(memcg); - - mutex_lock(&memcg_cache_mutex); - new_cachep = cache_from_memcg_idx(cachep, idx); - if (new_cachep) { - css_put(&memcg->css); - goto out; - } - - new_cachep = kmem_cache_dup(memcg, cachep); - if (new_cachep == NULL) { - new_cachep = cachep; - css_put(&memcg->css); - goto out; - } - - atomic_set(&new_cachep->memcg_params->nr_pages , 0); - - /* - * Since readers won't lock (see cache_from_memcg_idx()), we need a - * barrier here to ensure nobody will see the kmem_cache partially - * initialized. - */ - smp_wmb(); - - cachep->memcg_params->memcg_caches[idx] = new_cachep; -out: - mutex_unlock(&memcg_cache_mutex); - return new_cachep; -} - void kmem_cache_destroy_memcg_children(struct kmem_cache *s) { struct kmem_cache *c; @@ -3516,6 +3483,7 @@ static void memcg_create_cache_work_func(struct work_struct *w) cw = container_of(w, struct create_work, work); memcg_create_kmem_cache(cw->memcg, cw->cachep); + css_put(&cw->memcg->css); kfree(cw); } diff --git a/mm/slab_common.c b/mm/slab_common.c index 62712fe..51dc106 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -176,6 +176,12 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, get_online_cpus(); mutex_lock(&slab_mutex); + if (memcg) { + s = cache_from_memcg_idx(parent_cache, memcg_cache_id(memcg)); + if (s) + goto out_unlock; + } + err = kmem_cache_sanity_check(memcg, name, size); if (err) goto out_unlock; @@ -218,7 +224,7 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, s->refcount = 1; list_add(&s->list, &slab_caches); - memcg_cache_list_add(memcg, s); + memcg_register_cache(s); out_unlock: mutex_unlock(&slab_mutex); -- 1.7.10.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>