On Fri 08-07-16 01:00:13, Tetsuo Handa wrote: > >From 70de3fe92435095b6ecbb400c61e84a99f639d56 Mon Sep 17 00:00:00 2001 > From: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> > Date: Fri, 8 Jul 2016 00:28:12 +0900 > Subject: [PATCH 1/6] mm,oom_reaper: Reduce find_lock_task_mm() usage. > > Since holding mm_struct with elevated mm_count for a second is harmless, > we can determine mm_struct and hold it upon entry of oom_reap_task(). > This patch has no functional change. Future patch in this series will > eliminate find_lock_task_mm() usage from the OOM reaper. the changelog is quite poor to be honest. It doesn't explain why this is really needed. What do you think about the following: " __oom_reap_task can be simplified a bit if it received a valid mm from oom_reap_task which might need it as well. We could drop one find_lock_task_mm call and also make the __oom_reap_task code flow easier to follow. Moreover this will make later patch in the series easier to review. Pinning mm's mm_count for longer time is not really harmfull because this will not pin much memory. This patch doesn't introduce any functional change. " > > Signed-off-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Other than that the patch looks good to me. Acked-by: Michal Hocko <mhocko@xxxxxxxx> > --- > mm/oom_kill.c | 79 ++++++++++++++++++++++++++++------------------------------- > 1 file changed, 37 insertions(+), 42 deletions(-) > > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > index 7d0a275..951eb1b 100644 > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -452,12 +452,10 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); > static struct task_struct *oom_reaper_list; > static DEFINE_SPINLOCK(oom_reaper_lock); > > -static bool __oom_reap_task(struct task_struct *tsk) > +static bool __oom_reap_task(struct task_struct *tsk, struct mm_struct *mm) > { > struct mmu_gather tlb; > struct vm_area_struct *vma; > - struct mm_struct *mm = NULL; > - struct task_struct *p; > struct zap_details details = {.check_swap_entries = true, > .ignore_dirty = true}; > bool ret = true; > @@ -478,22 +476,9 @@ static bool __oom_reap_task(struct task_struct *tsk) > */ > mutex_lock(&oom_lock); > > - /* > - * Make sure we find the associated mm_struct even when the particular > - * thread has already terminated and cleared its mm. > - * We might have race with exit path so consider our work done if there > - * is no mm. > - */ > - p = find_lock_task_mm(tsk); > - if (!p) > - goto unlock_oom; > - mm = p->mm; > - atomic_inc(&mm->mm_count); > - task_unlock(p); > - > if (!down_read_trylock(&mm->mmap_sem)) { > ret = false; > - goto mm_drop; > + goto unlock_oom; > } > > /* > @@ -503,7 +488,7 @@ static bool __oom_reap_task(struct task_struct *tsk) > */ > if (!mmget_not_zero(mm)) { > up_read(&mm->mmap_sem); > - goto mm_drop; > + goto unlock_oom; > } > > tlb_gather_mmu(&tlb, mm, 0, -1); > @@ -551,8 +536,6 @@ static bool __oom_reap_task(struct task_struct *tsk) > * put the oom_reaper out of the way. > */ > mmput_async(mm); > -mm_drop: > - mmdrop(mm); > unlock_oom: > mutex_unlock(&oom_lock); > return ret; > @@ -562,36 +545,45 @@ unlock_oom: > static void oom_reap_task(struct task_struct *tsk) > { > int attempts = 0; > + struct mm_struct *mm = NULL; > + struct task_struct *p = find_lock_task_mm(tsk); > + > + /* > + * Make sure we find the associated mm_struct even when the particular > + * thread has already terminated and cleared its mm. > + * We might have race with exit path so consider our work done if there > + * is no mm. > + */ > + if (!p) > + goto done; > + mm = p->mm; > + atomic_inc(&mm->mm_count); > + task_unlock(p); > > /* Retry the down_read_trylock(mmap_sem) a few times */ > - while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk)) > + while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task(tsk, mm)) > schedule_timeout_idle(HZ/10); > > - if (attempts > MAX_OOM_REAP_RETRIES) { > - struct task_struct *p; > + if (attempts <= MAX_OOM_REAP_RETRIES) > + goto done; > > - pr_info("oom_reaper: unable to reap pid:%d (%s)\n", > - task_pid_nr(tsk), tsk->comm); > + pr_info("oom_reaper: unable to reap pid:%d (%s)\n", > + task_pid_nr(tsk), tsk->comm); > > - /* > - * If we've already tried to reap this task in the past and > - * failed it probably doesn't make much sense to try yet again > - * so hide the mm from the oom killer so that it can move on > - * to another task with a different mm struct. > - */ > - p = find_lock_task_mm(tsk); > - if (p) { > - if (test_and_set_bit(MMF_OOM_NOT_REAPABLE, &p->mm->flags)) { > - pr_info("oom_reaper: giving up pid:%d (%s)\n", > - task_pid_nr(tsk), tsk->comm); > - set_bit(MMF_OOM_REAPED, &p->mm->flags); > - } > - task_unlock(p); > - } > - > - debug_show_all_locks(); > + /* > + * If we've already tried to reap this task in the past and > + * failed it probably doesn't make much sense to try yet again > + * so hide the mm from the oom killer so that it can move on > + * to another task with a different mm struct. > + */ > + if (test_and_set_bit(MMF_OOM_NOT_REAPABLE, &mm->flags)) { > + pr_info("oom_reaper: giving up pid:%d (%s)\n", > + task_pid_nr(tsk), tsk->comm); > + set_bit(MMF_OOM_REAPED, &mm->flags); > } > + debug_show_all_locks(); > > +done: > /* > * Clear TIF_MEMDIE because the task shouldn't be sitting on a > * reasonably reclaimable memory anymore or it is not a good candidate > @@ -603,6 +595,9 @@ static void oom_reap_task(struct task_struct *tsk) > > /* Drop a reference taken by wake_oom_reaper */ > put_task_struct(tsk); > + /* Drop a reference taken above. */ > + if (mm) > + mmdrop(mm); > } > > static int oom_reaper(void *unused) > -- > 1.8.3.1 -- Michal Hocko SUSE Labs -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>