"mm, oom: Avoid potential RCU stall at dump_tasks()." changed to imply oom_dump_tasks == 0 if oom_kill_allocating_task != 0. But since we can expect the OOM reaper to reclaim memory quickly, and majority of latency is not for_each_process() from select_bad_process() but printk() from dump_header(), waiting for in-flight OOM victims until the OOM reaper completes should generate preferable results (i.e. minimal number of OOM victims). As side effects of this patch, oom_kill_allocating_task != 0 no longer implies oom_dump_tasks == 0, complicated conditions for whether to enter oom_kill_allocating_task path are simplified, and a theoretical bug that the OOM killer forever retries oom_kill_allocating_task path even after the OOM reaper set MMF_OOM_SKIP is fixed. Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> --- mm/oom_kill.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 00b594c..64e582e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -367,19 +367,29 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) * Simple selection loop. We choose the process with the highest number of * 'points'. In case scan was aborted, oc->chosen is set to -1. */ -static void select_bad_process(struct oom_control *oc) +static const char *select_bad_process(struct oom_control *oc) { - if (is_memcg_oom(oc)) - mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); - else { - struct task_struct *p; + struct task_struct *p; - rcu_read_lock(); - for_each_process(p) - if (oom_evaluate_task(p, oc)) - break; - rcu_read_unlock(); + if (is_memcg_oom(oc)) { + mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); + return "Memory cgroup out of memory"; } + rcu_read_lock(); + for_each_process(p) + if (oom_evaluate_task(p, oc)) + break; + rcu_read_unlock(); + if (sysctl_oom_kill_allocating_task && oc->chosen != (void *)-1UL) { + list_for_each_entry(p, &oom_candidate_list, + oom_candidate_list) { + if (!same_thread_group(p, current)) + continue; + oc->chosen = current; + return "Out of memory (oom_kill_allocating_task)"; + } + } + return "Out of memory"; } /** @@ -1021,6 +1031,7 @@ bool out_of_memory(struct oom_control *oc) { unsigned long freed = 0; enum oom_constraint constraint = CONSTRAINT_NONE; + const char *message; if (oom_killer_disabled) return false; @@ -1061,15 +1072,7 @@ bool out_of_memory(struct oom_control *oc) oc->nodemask = NULL; check_panic_on_oom(oc, constraint); - if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && - current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && - current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { - oc->chosen = current; - oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)"); - return true; - } - - select_bad_process(oc); + message = select_bad_process(oc); /* Found nothing?!?! */ if (!oc->chosen) { dump_header(oc, NULL); @@ -1083,8 +1086,7 @@ bool out_of_memory(struct oom_control *oc) panic("System is deadlocked on memory\n"); } if (oc->chosen && oc->chosen != (void *)-1UL) - oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : - "Memory cgroup out of memory"); + oom_kill_process(oc, message); while (!list_empty(&oom_candidate_list)) { struct task_struct *p = list_first_entry(&oom_candidate_list, struct task_struct, -- 1.8.3.1