On Tue 19-12-17 20:26:14, Tetsuo Handa wrote: > When the OOM reaper set MMF_OOM_SKIP on the victim's mm before threads > sharing that mm get ->signal->oom_mm, the comment "That thread will now > get access to memory reserves since it has a pending fatal signal." no > longer stands. Also, since we introduced ALLOC_OOM watermark, the comment > "They don't get access to memory reserves, though, to avoid depletion of > all memory." no longer stands. > > This patch treats all thread groups sharing the victim's mm evenly, > and updates the outdated comment. Nack with a real life example where this matters. > Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> > Cc: Roman Gushchin <guro@xxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxxx> > Cc: Johannes Weiner <hannes@xxxxxxxxxxx> > Cc: Vladimir Davydov <vdavydov.dev@xxxxxxxxx> > Cc: David Rientjes <rientjes@xxxxxxxxxx> > Cc: Tejun Heo <tj@xxxxxxxxxx> > --- > mm/oom_kill.c | 58 ++++++++++++++++++++++++++++------------------------------ > 1 file changed, 28 insertions(+), 30 deletions(-) > > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > index 2b99e02..19028bd 100644 > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -833,7 +833,7 @@ static bool task_will_free_mem(struct task_struct *task) > return ret; > } > > -static void __oom_kill_process(struct task_struct *victim) > +static void __oom_kill_process(struct task_struct *victim, const bool silent) > { > struct task_struct *p; > struct mm_struct *mm; > @@ -868,35 +868,19 @@ static void __oom_kill_process(struct task_struct *victim) > count_vm_event(OOM_KILL); > count_memcg_event_mm(mm, OOM_KILL); > > - /* > - * We should send SIGKILL before granting access to memory reserves > - * in order to prevent the OOM victim from depleting the memory > - * reserves from the user space under its control. > - */ > - do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); > - mark_oom_victim(victim); > - pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", > - task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), > - K(get_mm_counter(victim->mm, MM_ANONPAGES)), > - K(get_mm_counter(victim->mm, MM_FILEPAGES)), > - K(get_mm_counter(victim->mm, MM_SHMEMPAGES))); > task_unlock(victim); > > /* > - * Kill all user processes sharing victim->mm in other thread groups, if > - * any. They don't get access to memory reserves, though, to avoid > - * depletion of all memory. This prevents mm->mmap_sem livelock when an > - * oom killed thread cannot exit because it requires the semaphore and > - * its contended by another thread trying to allocate memory itself. > - * That thread will now get access to memory reserves since it has a > - * pending fatal signal. > + * Kill all user processes sharing this mm. This helps the OOM reaper > + * to reclaim memory by interrupting threads waiting or holding > + * mm->mmap_sem for write. > */ > rcu_read_lock(); > for_each_process(p) { > + struct task_struct *t; > + > if (!process_shares_mm(p, mm)) > continue; > - if (same_thread_group(p, victim)) > - continue; > if (is_global_init(p)) { > can_oom_reap = false; > set_bit(MMF_OOM_SKIP, &mm->flags); > @@ -911,10 +895,26 @@ static void __oom_kill_process(struct task_struct *victim) > */ > if (unlikely(p->flags & PF_KTHREAD)) > continue; > + /* > + * We should send SIGKILL before granting access to memory > + * reserves in order to prevent the OOM victim from depleting > + * the memory reserves from the user space under its control. > + */ > do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); > + t = find_lock_task_mm(p); > + if (t) { > + mark_oom_victim(t); > + task_unlock(t); > + } > } > rcu_read_unlock(); > > + if (!silent) > + pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", > + task_pid_nr(victim), victim->comm, K(mm->total_vm), > + K(get_mm_counter(mm, MM_ANONPAGES)), > + K(get_mm_counter(mm, MM_FILEPAGES)), > + K(get_mm_counter(mm, MM_SHMEMPAGES))); > if (can_oom_reap) > wake_oom_reaper(victim); > > @@ -941,10 +941,8 @@ static void oom_kill_process(struct oom_control *oc, const char *message) > */ > task_lock(p); > if (task_will_free_mem(p)) { > - mark_oom_victim(p); > - wake_oom_reaper(p); > task_unlock(p); > - put_task_struct(p); > + __oom_kill_process(p, true); > return; > } > task_unlock(p); > @@ -983,13 +981,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message) > } > read_unlock(&tasklist_lock); > > - __oom_kill_process(victim); > + __oom_kill_process(victim, false); > } > > static int oom_kill_memcg_member(struct task_struct *task, void *unused) > { > get_task_struct(task); > - __oom_kill_process(task); > + __oom_kill_process(task, false); > return 0; > } > > @@ -1017,7 +1015,7 @@ static bool oom_kill_memcg_victim(struct oom_control *oc) > oc->chosen_task == INFLIGHT_VICTIM) > goto out; > > - __oom_kill_process(oc->chosen_task); > + __oom_kill_process(oc->chosen_task, false); > } > > out: > @@ -1095,8 +1093,8 @@ bool out_of_memory(struct oom_control *oc) > * quickly exit and free its memory. > */ > if (task_will_free_mem(current)) { > - mark_oom_victim(current); > - wake_oom_reaper(current); > + get_task_struct(current); > + __oom_kill_process(current, true); > return true; > } > > -- > 1.8.3.1 > -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>