Currently out_of_memory() is full of get_task_struct()/put_task_struct() calls. Since "mm, oom: avoid printk() iteration under RCU" introduced a list for holding a snapshot of all OOM victim candidates, let's share that list for select_bad_process() and oom_kill_process() in order to simplify task's refcount handling. As a result of this patch, get_task_struct()/put_task_struct() calls in out_of_memory() are reduced to only 2 times respectively. Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Cc: Shakeel Butt <shakeelb@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Roman Gushchin <guro@xxxxxx> Cc: David Rientjes <rientjes@xxxxxxxxxx> --- include/linux/sched.h | 2 +- mm/oom_kill.c | 122 ++++++++++++++++++++++++-------------------------- 2 files changed, 60 insertions(+), 64 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 48c1a4c..4062999 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1247,7 +1247,7 @@ struct task_struct { #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; #endif - struct list_head oom_victim_list; + struct list_head oom_candidate; #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; #endif diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 110f948..311e0e9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -63,6 +63,7 @@ * and mark_oom_victim */ DEFINE_MUTEX(oom_lock); +static LIST_HEAD(oom_candidate_list); static inline bool is_memcg_oom(struct oom_control *oc) { @@ -167,6 +168,41 @@ static bool oom_unkillable_task(struct task_struct *p) return false; } +static int add_candidate_task(struct task_struct *p, void *unused) +{ + if (!oom_unkillable_task(p)) { + get_task_struct(p); + list_add_tail(&p->oom_candidate, &oom_candidate_list); + } + return 0; +} + +static void link_oom_candidates(struct oom_control *oc) +{ + struct task_struct *p; + + if (is_memcg_oom(oc)) + mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, NULL); + else { + rcu_read_lock(); + for_each_process(p) + add_candidate_task(p, NULL); + rcu_read_unlock(); + } + +} + +static void unlink_oom_candidates(void) +{ + struct task_struct *p; + struct task_struct *t; + + list_for_each_entry_safe(p, t, &oom_candidate_list, oom_candidate) { + list_del(&p->oom_candidate); + put_task_struct(p); + } +} + /* * Print out unreclaimble slabs info when unreclaimable slabs amount is greater * than all user memory (LRU pages) @@ -344,16 +380,11 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) goto next; select: - if (oc->chosen) - put_task_struct(oc->chosen); - get_task_struct(task); oc->chosen = task; oc->chosen_points = points; next: return 0; abort: - if (oc->chosen) - put_task_struct(oc->chosen); oc->chosen = (void *)-1UL; return 1; } @@ -364,27 +395,13 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) */ static void select_bad_process(struct oom_control *oc) { - if (is_memcg_oom(oc)) - mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); - else { - struct task_struct *p; - - rcu_read_lock(); - for_each_process(p) - if (oom_evaluate_task(p, oc)) - break; - rcu_read_unlock(); - } -} - + struct task_struct *p; -static int add_candidate_task(struct task_struct *p, void *arg) -{ - if (!oom_unkillable_task(p)) { - get_task_struct(p); - list_add_tail(&p->oom_victim_list, (struct list_head *) arg); + list_for_each_entry(p, &oom_candidate_list, oom_candidate) { + cond_resched(); + if (oom_evaluate_task(p, oc)) + break; } - return 0; } /** @@ -399,21 +416,12 @@ static int add_candidate_task(struct task_struct *p, void *arg) */ static void dump_tasks(struct oom_control *oc) { - static LIST_HEAD(list); struct task_struct *p; struct task_struct *t; - if (is_memcg_oom(oc)) - mem_cgroup_scan_tasks(oc->memcg, add_candidate_task, &list); - else { - rcu_read_lock(); - for_each_process(p) - add_candidate_task(p, &list); - rcu_read_unlock(); - } pr_info("Tasks state (memory values in pages):\n"); pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); - list_for_each_entry(p, &list, oom_victim_list) { + list_for_each_entry(p, &oom_candidate_list, oom_candidate) { cond_resched(); /* p may not have freeable memory in nodemask */ if (!is_memcg_oom(oc) && !oom_cpuset_eligible(p, oc)) @@ -430,10 +438,6 @@ static void dump_tasks(struct oom_control *oc) t->signal->oom_score_adj, t->comm); task_unlock(t); } - list_for_each_entry_safe(p, t, &list, oom_victim_list) { - list_del(&p->oom_victim_list); - put_task_struct(p); - } } static void dump_oom_summary(struct oom_control *oc, struct task_struct *victim) @@ -859,17 +863,11 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) bool can_oom_reap = true; p = find_lock_task_mm(victim); - if (!p) { - put_task_struct(victim); + if (!p) return; - } else if (victim != p) { - get_task_struct(p); - put_task_struct(victim); - victim = p; - } - /* Get a reference to safely compare mm after task_unlock(victim) */ - mm = victim->mm; + /* Get a reference to safely compare mm after task_unlock(p) */ + mm = p->mm; mmgrab(mm); /* Raise event before sending signal: task reaper must see this */ @@ -881,16 +879,15 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) * in order to prevent the OOM victim from depleting the memory * reserves from the user space under its control. */ - do_send_sig_info(SIGKILL, SEND_SIG_PRIV, victim, PIDTYPE_TGID); - mark_oom_victim(victim); + do_send_sig_info(SIGKILL, SEND_SIG_PRIV, p, PIDTYPE_TGID); + mark_oom_victim(p); pr_err("%s: Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB, UID:%u\n", - message, task_pid_nr(victim), victim->comm, - K(victim->mm->total_vm), - K(get_mm_counter(victim->mm, MM_ANONPAGES)), - K(get_mm_counter(victim->mm, MM_FILEPAGES)), - K(get_mm_counter(victim->mm, MM_SHMEMPAGES)), - from_kuid(&init_user_ns, task_uid(victim))); - task_unlock(victim); + message, task_pid_nr(p), p->comm, K(mm->total_vm), + K(get_mm_counter(mm, MM_ANONPAGES)), + K(get_mm_counter(mm, MM_FILEPAGES)), + K(get_mm_counter(mm, MM_SHMEMPAGES)), + from_kuid(&init_user_ns, task_uid(p))); + task_unlock(p); /* * Kill all user processes sharing victim->mm in other thread groups, if @@ -929,7 +926,6 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) wake_oom_reaper(victim); mmdrop(mm); - put_task_struct(victim); } #undef K @@ -940,10 +936,8 @@ static void __oom_kill_process(struct task_struct *victim, const char *message) static int oom_kill_memcg_member(struct task_struct *task, void *message) { if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN && - !is_global_init(task)) { - get_task_struct(task); + !is_global_init(task)) __oom_kill_process(task, message); - } return 0; } @@ -964,7 +958,6 @@ static void oom_kill_process(struct oom_control *oc, const char *message) mark_oom_victim(victim); wake_oom_reaper(victim); task_unlock(victim); - put_task_struct(victim); return; } task_unlock(victim); @@ -1073,6 +1066,8 @@ bool out_of_memory(struct oom_control *oc) if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS)) return true; + link_oom_candidates(oc); + /* * Check if there were limitations on the allocation (only relevant for * NUMA and memcg) that may require different handling. @@ -1086,10 +1081,9 @@ bool out_of_memory(struct oom_control *oc) current->mm && !oom_unkillable_task(current) && oom_cpuset_eligible(current, oc) && current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { - get_task_struct(current); oc->chosen = current; oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)"); - return true; + goto done; } select_bad_process(oc); @@ -1108,6 +1102,8 @@ bool out_of_memory(struct oom_control *oc) if (oc->chosen && oc->chosen != (void *)-1UL) oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : "Memory cgroup out of memory"); + done: + unlink_oom_candidates(); return !!oc->chosen; } -- 1.8.3.1