Commit 6af3142bed1f52 ("mm/slub: don't wait for high-order page allocation") made allocate_slab() try to allocate high order slab pages without __GFP_WAIT in order to avoid invoking reclaim/compaction when we can fall back on low order pages. However, it broke memcg/memory.high logic in case kmem accounting is enabled. The memory.high threshold works as a soft limit: an allocation does not fail if it is breached, but we call direct reclaim to compensate for the excess. Without __GFP_WAIT we cannot invoke reclaimer and therefore we will go on exceeding memory.high more and more until a normal __GFP_WAIT allocation is issued. Since memcg reclaim never triggers compaction, we can pass __GFP_WAIT to memcg_charge_slab() even on high order page allocations w/o any performance impact. So let us fix this problem by excluding __GFP_WAIT only from alloc_pages() while still forwarding it to memcg_charge_slab() if the context allows. Reported-by: Tejun Heo <tj@xxxxxxxxxx> Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx> --- mm/slub.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index e180f8dcd06d..416a332277cb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1333,6 +1333,14 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s, if (memcg_charge_slab(s, flags, order)) return NULL; + /* + * Let the initial higher-order allocation fail under memory pressure + * so we fall-back to the minimum order allocation. + */ + if (oo_order(oo) > oo_order(s->min)) + flags = (flags | __GFP_NOWARN | __GFP_NOMEMALLOC) & + ~(__GFP_NOFAIL | __GFP_WAIT); + if (node == NUMA_NO_NODE) page = alloc_pages(flags, order); else @@ -1348,7 +1356,6 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) { struct page *page; struct kmem_cache_order_objects oo = s->oo; - gfp_t alloc_gfp; void *start, *p; int idx, order; @@ -1359,23 +1366,14 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) flags |= s->allocflags; - /* - * Let the initial higher-order allocation fail under memory pressure - * so we fall-back to the minimum order allocation. - */ - alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; - if ((alloc_gfp & __GFP_WAIT) && oo_order(oo) > oo_order(s->min)) - alloc_gfp = (alloc_gfp | __GFP_NOMEMALLOC) & ~__GFP_WAIT; - - page = alloc_slab_page(s, alloc_gfp, node, oo); + page = alloc_slab_page(s, flags, node, oo); if (unlikely(!page)) { oo = s->min; - alloc_gfp = flags; /* * Allocation may have failed due to fragmentation. * Try a lower order alloc if possible */ - page = alloc_slab_page(s, alloc_gfp, node, oo); + page = alloc_slab_page(s, flags, node, oo); if (unlikely(!page)) goto out; stat(s, ORDER_FALLBACK); @@ -1385,7 +1383,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { int pages = 1 << oo_order(oo); - kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node); + kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); /* * Objects from caches that have a constructor don't get -- 2.1.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>