The patch titled Subject: memcg: only account kmem allocations marked as __GFP_ACCOUNT has been added to the -mm tree. Its filename is memcg-only-account-kmem-allocations-marked-as-__gfp_account.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/memcg-only-account-kmem-allocations-marked-as-__gfp_account.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/memcg-only-account-kmem-allocations-marked-as-__gfp_account.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx> Subject: memcg: only account kmem allocations marked as __GFP_ACCOUNT Black-list kmem accounting policy (aka __GFP_NOACCOUNT) turned out to be fragile and difficult to maintain, because there seem to be many more allocations that should not be accounted than those that should be. Besides, false accounting an allocation might result in much worse consequences than not accounting at all, namely increased memory consumption due to pinned dead kmem caches. So this patch switches kmem accounting to the white-policy: now only those kmem allocations that are marked as __GFP_ACCOUNT are accounted to memcg. Currently, no kmem allocations are marked like this. The following patches will mark several kmem allocations that are known to be easily triggered from userspace and therefore should be accounted to memcg. Signed-off-by: Vladimir Davydov <vdavydov@xxxxxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Greg Thelen <gthelen@xxxxxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Cc: Pekka Enberg <penberg@xxxxxxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/gfp.h | 9 +++++++++ include/linux/memcontrol.h | 2 ++ mm/page_alloc.c | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) diff -puN include/linux/gfp.h~memcg-only-account-kmem-allocations-marked-as-__gfp_account include/linux/gfp.h --- a/include/linux/gfp.h~memcg-only-account-kmem-allocations-marked-as-__gfp_account +++ a/include/linux/gfp.h @@ -30,6 +30,7 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_ATOMIC 0x80000u +#define ___GFP_ACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -72,11 +73,15 @@ struct vm_area_struct; * * __GFP_THISNODE forces the allocation to be satisified from the requested * node with no fallbacks or placement policy enforcements. + * + * __GFP_ACCOUNT causes the allocation to be accounted to kmemcg (only relevant + * to kmem allocations). */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) #define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) #define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) +#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT) /* * Watermark modifiers -- controls access to emergency reserves @@ -195,6 +200,9 @@ struct vm_area_struct; * GFP_KERNEL is typical for kernel-internal allocations. The caller requires * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. * + * GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is + * accounted to kmemcg. + * * GFP_NOWAIT is for kernel allocations that should not stall for direct * reclaim, start physical IO or use any filesystem callback. * @@ -234,6 +242,7 @@ struct vm_area_struct; */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) diff -puN include/linux/memcontrol.h~memcg-only-account-kmem-allocations-marked-as-__gfp_account include/linux/memcontrol.h --- a/include/linux/memcontrol.h~memcg-only-account-kmem-allocations-marked-as-__gfp_account +++ a/include/linux/memcontrol.h @@ -773,6 +773,8 @@ static inline bool __memcg_kmem_bypass(g { if (!memcg_kmem_enabled()) return true; + if (!(gfp & __GFP_ACCOUNT)) + return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; return false; diff -puN mm/page_alloc.c~memcg-only-account-kmem-allocations-marked-as-__gfp_account mm/page_alloc.c --- a/mm/page_alloc.c~memcg-only-account-kmem-allocations-marked-as-__gfp_account +++ a/mm/page_alloc.c @@ -3402,7 +3402,8 @@ EXPORT_SYMBOL(__free_page_frag); /* * alloc_kmem_pages charges newly allocated pages to the kmem resource counter - * of the current memory cgroup. + * of the current memory cgroup if __GFP_ACCOUNT is set, other than that it is + * equivalent to alloc_pages. * * It should be used when the caller would like to use kmalloc, but since the * allocation is large, it has to fall back to the page allocator. _ Patches currently in -mm which might be from vdavydov@xxxxxxxxxxxxx are revert-kernfs-do-not-account-ino_ida-allocations-to-memcg.patch revert-gfp-add-__gfp_noaccount.patch memcg-only-account-kmem-allocations-marked-as-__gfp_account.patch slab-add-slab_account-flag.patch vmalloc-allow-to-account-vmalloc-to-memcg.patch account-certain-kmem-allocations-to-memcg.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html