On Mon 04-09-17 15:21:04, Roman Gushchin wrote: > The oom_kill_process() function consists of two logical parts: > the first one is responsible for considering task's children as > a potential victim and printing the debug information. > The second half is responsible for sending SIGKILL to all > tasks sharing the mm struct with the given victim. > > This commit splits the oom_kill_process() function with > an intention to re-use the the second half: __oom_kill_process(). > > The cgroup-aware OOM killer will kill multiple tasks > belonging to the victim cgroup. We don't need to print > the debug information for the each task, as well as play > with task selection (considering task's children), > so we can't use the existing oom_kill_process(). > > Signed-off-by: Roman Gushchin <guro@xxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxxxx> > Cc: Vladimir Davydov <vdavydov.dev@xxxxxxxxx> > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > Cc: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> > Cc: David Rientjes <rientjes@xxxxxxxxxx> > Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> > Cc: Tejun Heo <tj@xxxxxxxxxx> > Cc: kernel-team@xxxxxx > Cc: cgroups@xxxxxxxxxxxxxxx > Cc: linux-doc@xxxxxxxxxxxxxxx > Cc: linux-kernel@xxxxxxxxxxxxxxx > Cc: linux-mm@xxxxxxxxx Looks good to me Acked-by: Michal Hocko <mhocko@xxxxxxxx> > --- > mm/oom_kill.c | 123 +++++++++++++++++++++++++++++++--------------------------- > 1 file changed, 65 insertions(+), 58 deletions(-) > > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > index 99736e026712..f061b627092c 100644 > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -804,68 +804,12 @@ static bool task_will_free_mem(struct task_struct *task) > return ret; > } > > -static void oom_kill_process(struct oom_control *oc, const char *message) > +static void __oom_kill_process(struct task_struct *victim) > { > - struct task_struct *p = oc->chosen; > - unsigned int points = oc->chosen_points; > - struct task_struct *victim = p; > - struct task_struct *child; > - struct task_struct *t; > + struct task_struct *p; > struct mm_struct *mm; > - unsigned int victim_points = 0; > - static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, > - DEFAULT_RATELIMIT_BURST); > bool can_oom_reap = true; > > - /* > - * If the task is already exiting, don't alarm the sysadmin or kill > - * its children or threads, just give it access to memory reserves > - * so it can die quickly > - */ > - task_lock(p); > - if (task_will_free_mem(p)) { > - mark_oom_victim(p); > - wake_oom_reaper(p); > - task_unlock(p); > - put_task_struct(p); > - return; > - } > - task_unlock(p); > - > - if (__ratelimit(&oom_rs)) > - dump_header(oc, p); > - > - pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", > - message, task_pid_nr(p), p->comm, points); > - > - /* > - * If any of p's children has a different mm and is eligible for kill, > - * the one with the highest oom_badness() score is sacrificed for its > - * parent. This attempts to lose the minimal amount of work done while > - * still freeing memory. > - */ > - read_lock(&tasklist_lock); > - for_each_thread(p, t) { > - list_for_each_entry(child, &t->children, sibling) { > - unsigned int child_points; > - > - if (process_shares_mm(child, p->mm)) > - continue; > - /* > - * oom_badness() returns 0 if the thread is unkillable > - */ > - child_points = oom_badness(child, > - oc->memcg, oc->nodemask, oc->totalpages); > - if (child_points > victim_points) { > - put_task_struct(victim); > - victim = child; > - victim_points = child_points; > - get_task_struct(victim); > - } > - } > - } > - read_unlock(&tasklist_lock); > - > p = find_lock_task_mm(victim); > if (!p) { > put_task_struct(victim); > @@ -939,6 +883,69 @@ static void oom_kill_process(struct oom_control *oc, const char *message) > } > #undef K > > +static void oom_kill_process(struct oom_control *oc, const char *message) > +{ > + struct task_struct *p = oc->chosen; > + unsigned int points = oc->chosen_points; > + struct task_struct *victim = p; > + struct task_struct *child; > + struct task_struct *t; > + unsigned int victim_points = 0; > + static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL, > + DEFAULT_RATELIMIT_BURST); > + > + /* > + * If the task is already exiting, don't alarm the sysadmin or kill > + * its children or threads, just give it access to memory reserves > + * so it can die quickly > + */ > + task_lock(p); > + if (task_will_free_mem(p)) { > + mark_oom_victim(p); > + wake_oom_reaper(p); > + task_unlock(p); > + put_task_struct(p); > + return; > + } > + task_unlock(p); > + > + if (__ratelimit(&oom_rs)) > + dump_header(oc, p); > + > + pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", > + message, task_pid_nr(p), p->comm, points); > + > + /* > + * If any of p's children has a different mm and is eligible for kill, > + * the one with the highest oom_badness() score is sacrificed for its > + * parent. This attempts to lose the minimal amount of work done while > + * still freeing memory. > + */ > + read_lock(&tasklist_lock); > + for_each_thread(p, t) { > + list_for_each_entry(child, &t->children, sibling) { > + unsigned int child_points; > + > + if (process_shares_mm(child, p->mm)) > + continue; > + /* > + * oom_badness() returns 0 if the thread is unkillable > + */ > + child_points = oom_badness(child, > + oc->memcg, oc->nodemask, oc->totalpages); > + if (child_points > victim_points) { > + put_task_struct(victim); > + victim = child; > + victim_points = child_points; > + get_task_struct(victim); > + } > + } > + } > + read_unlock(&tasklist_lock); > + > + __oom_kill_process(victim); > +} > + > /* > * Determines whether the kernel must panic because of the panic_on_oom sysctl. > */ > -- > 2.13.5 -- Michal Hocko SUSE Labs -- To unsubscribe from this list: send the line "unsubscribe cgroups" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html