The patch titled Subject: mm, oom: fix oom_unkillable_task for memcg OOMs has been added to the -mm tree. Its filename is mm-oom-fix-oom_unkillable_task-for-memcg-ooms.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-oom-fix-oom_unkillable_task-for-memcg-ooms.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-oom-fix-oom_unkillable_task-for-memcg-ooms.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/process/submit-checklist.rst when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Shakeel Butt <shakeelb@xxxxxxxxxx> Subject: mm, oom: fix oom_unkillable_task for memcg OOMs Currently oom_unkillable_task() checks mems_allowed even for memcg OOMs which does not make sense as memcg OOMs cannot be triggered due to numa constraints. Fix that. This commit also removes the bogus usage of oom_unkillable_task() from oom_badness(). Currently reading /proc/[pid]/oom_score will do a bogus cpuset_mems_allowed_intersects() check. Remove that. Link: http://lkml.kernel.org/r/20190617231207.160865-2-shakeelb@xxxxxxxxxx Signed-off-by: Shakeel Butt <shakeelb@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Roman Gushchin <guro@xxxxxx> Cc: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/proc/base.c | 3 +-- include/linux/oom.h | 1 - mm/oom_kill.c | 28 +++++++++++++++------------- 3 files changed, 16 insertions(+), 16 deletions(-) --- a/fs/proc/base.c~mm-oom-fix-oom_unkillable_task-for-memcg-ooms +++ a/fs/proc/base.c @@ -532,8 +532,7 @@ static int proc_oom_score(struct seq_fil unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; - points = oom_badness(task, NULL, NULL, totalpages) * - 1000 / totalpages; + points = oom_badness(task, totalpages) * 1000 / totalpages; seq_printf(m, "%lu\n", points); return 0; --- a/include/linux/oom.h~mm-oom-fix-oom_unkillable_task-for-memcg-ooms +++ a/include/linux/oom.h @@ -108,7 +108,6 @@ static inline vm_fault_t check_stable_ad bool __oom_reap_task_mm(struct mm_struct *mm); extern unsigned long oom_badness(struct task_struct *p, - struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); extern bool out_of_memory(struct oom_control *oc); --- a/mm/oom_kill.c~mm-oom-fix-oom_unkillable_task-for-memcg-ooms +++ a/mm/oom_kill.c @@ -152,20 +152,23 @@ static inline bool is_memcg_oom(struct o } /* return true if the task is not adequate as candidate victim task. */ -static bool oom_unkillable_task(struct task_struct *p, - struct mem_cgroup *memcg, const nodemask_t *nodemask) +static bool oom_unkillable_task(struct task_struct *p, struct oom_control *oc) { if (is_global_init(p)) return true; if (p->flags & PF_KTHREAD) return true; - /* When mem_cgroup_out_of_memory() and p is not member of the group */ - if (memcg && !task_in_mem_cgroup(p, memcg)) - return true; + /* + * For memcg OOM, we reach here through mem_cgroup_scan_tasks(), no + * need to check p's memcg membership and the checks after this + * are irrelevant for memcg OOMs. + */ + if (is_memcg_oom(oc)) + return false; /* p may not have freeable memory in nodemask */ - if (!has_intersects_mems_allowed(p, nodemask)) + if (!has_intersects_mems_allowed(p, oc->nodemask)) return true; return false; @@ -201,13 +204,12 @@ static bool is_dump_unreclaim_slabs(void * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ -unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, - const nodemask_t *nodemask, unsigned long totalpages) +unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) { long points; long adj; - if (oom_unkillable_task(p, memcg, nodemask)) + if (is_global_init(p) || p->flags & PF_KTHREAD) return 0; p = find_lock_task_mm(p); @@ -318,7 +320,7 @@ static int oom_evaluate_task(struct task struct oom_control *oc = arg; unsigned long points; - if (oom_unkillable_task(task, NULL, oc->nodemask)) + if (oom_unkillable_task(task, oc)) goto next; /* @@ -342,7 +344,7 @@ static int oom_evaluate_task(struct task goto select; } - points = oom_badness(task, NULL, oc->nodemask, oc->totalpages); + points = oom_badness(task, oc->totalpages); if (!points || points < oc->chosen_points) goto next; @@ -390,7 +392,7 @@ static int dump_task(struct task_struct struct oom_control *oc = arg; struct task_struct *task; - if (oom_unkillable_task(p, NULL, oc->nodemask)) + if (oom_unkillable_task(p, oc)) return 0; task = find_lock_task_mm(p); @@ -1088,7 +1090,7 @@ bool out_of_memory(struct oom_control *o check_panic_on_oom(oc); if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && - current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && + current->mm && !oom_unkillable_task(current, oc) && current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { get_task_struct(current); oc->chosen = current; _ Patches currently in -mm which might be from shakeelb@xxxxxxxxxx are memcg-oom-no-oom-kill-for-__gfp_retry_mayfail.patch memcg-fsnotify-no-oom-kill-for-remote-memcg-charging.patch mm-memcg-introduce-memoryeventslocal.patch mm-oom-refactor-dump_tasks-for-memcg-ooms.patch mm-oom-fix-oom_unkillable_task-for-memcg-ooms.patch