The oom_kill_process() function consists of two logical parts: a first one is responsible for considering task's children as a potential victim and some debug output. The second half is responsible for sending SIGKILL to all tasks sharing mm with the given victim. This commit splits the oom_kill_process() function and exports __oom_kill_process (second half) to reuse it from the memory cgroup code. Signed-off-by: Roman Gushchin <guro@xxxxxx> Cc: Tejun Heo <tj@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: Li Zefan <lizefan@xxxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Cc: Vladimir Davydov <vdavydov.dev@xxxxxxxxx> Cc: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Cc: kernel-team@xxxxxx Cc: cgroups@xxxxxxxxxxxxxxx Cc: linux-doc@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx Cc: linux-mm@xxxxxxxxx --- include/linux/oom.h | 2 + mm/oom_kill.c | 121 +++++++++++++++++++++++++++------------------------- 2 files changed, 66 insertions(+), 57 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 8a266e2..47d9495 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -79,6 +79,8 @@ extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); +extern void __oom_kill_process(struct task_struct *victim); + /* sysctls */ extern int sysctl_oom_dump_tasks; extern int sysctl_oom_kill_allocating_task; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f8b0fb1..11b60a5 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -803,67 +803,12 @@ static bool task_will_free_mem(struct task_struct *task) return ret; } -static void oom_kill_process(struct oom_control *oc, const char *message) +void __oom_kill_process(struct task_struct *victim) { - struct task_struct *p = oc->chosen; - unsigned int points = oc->chosen_points; - struct task_struct *victim = p; - struct task_struct *child; - struct task_struct *t; + struct task_struct *p; struct mm_struct *mm; - unsigned int victim_points = 0; - static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, - DEFAULT_RATELIMIT_BURST); bool can_oom_reap = true; - /* - * If the task is already exiting, don't alarm the sysadmin or kill - * its children or threads, just set TIF_MEMDIE so it can die quickly - */ - task_lock(p); - if (task_will_free_mem(p)) { - mark_oom_victim(p); - wake_oom_reaper(p); - task_unlock(p); - put_task_struct(p); - return; - } - task_unlock(p); - - if (__ratelimit(&oom_rs)) - dump_header(oc, p); - - pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", - message, task_pid_nr(p), p->comm, points); - - /* - * If any of p's children has a different mm and is eligible for kill, - * the one with the highest oom_badness() score is sacrificed for its - * parent. This attempts to lose the minimal amount of work done while - * still freeing memory. - */ - read_lock(&tasklist_lock); - for_each_thread(p, t) { - list_for_each_entry(child, &t->children, sibling) { - unsigned int child_points; - - if (process_shares_mm(child, p->mm)) - continue; - /* - * oom_badness() returns 0 if the thread is unkillable - */ - child_points = oom_badness(child, - oc->memcg, oc->nodemask, oc->totalpages); - if (child_points > victim_points) { - put_task_struct(victim); - victim = child; - victim_points = child_points; - get_task_struct(victim); - } - } - } - read_unlock(&tasklist_lock); - p = find_lock_task_mm(victim); if (!p) { put_task_struct(victim); @@ -930,6 +875,68 @@ static void oom_kill_process(struct oom_control *oc, const char *message) mmdrop(mm); put_task_struct(victim); } + +static void oom_kill_process(struct oom_control *oc, const char *message) +{ + struct task_struct *p = oc->chosen; + unsigned int points = oc->chosen_points; + struct task_struct *victim = p; + struct task_struct *child; + struct task_struct *t; + unsigned int victim_points = 0; + static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + /* + * If the task is already exiting, don't alarm the sysadmin or kill + * its children or threads, just set TIF_MEMDIE so it can die quickly + */ + task_lock(p); + if (task_will_free_mem(p)) { + mark_oom_victim(p); + wake_oom_reaper(p); + task_unlock(p); + put_task_struct(p); + return; + } + task_unlock(p); + + if (__ratelimit(&oom_rs)) + dump_header(oc, p); + + pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", + message, task_pid_nr(p), p->comm, points); + + /* + * If any of p's children has a different mm and is eligible for kill, + * the one with the highest oom_badness() score is sacrificed for its + * parent. This attempts to lose the minimal amount of work done while + * still freeing memory. + */ + read_lock(&tasklist_lock); + for_each_thread(p, t) { + list_for_each_entry(child, &t->children, sibling) { + unsigned int child_points; + + if (process_shares_mm(child, p->mm)) + continue; + /* + * oom_badness() returns 0 if the thread is unkillable + */ + child_points = oom_badness(child, + oc->memcg, oc->nodemask, oc->totalpages); + if (child_points > victim_points) { + put_task_struct(victim); + victim = child; + victim_points = child_points; + get_task_struct(victim); + } + } + } + read_unlock(&tasklist_lock); + + __oom_kill_process(victim); +} #undef K /* -- 2.7.4 -- To unsubscribe from this list: send the line "unsubscribe cgroups" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html