On Thu 09-07-20 08:26:46, Michal Hocko wrote: > On Thu 09-07-20 10:14:14, Yafang Shao wrote: [...] > > diff --git a/fs/proc/base.c b/fs/proc/base.c > > index 774784587..0da8efa41 100644 > > --- a/fs/proc/base.c > > +++ b/fs/proc/base.c > > @@ -528,7 +528,7 @@ static int proc_oom_score(struct seq_file *m, > > struct pid_namespace *ns, > > unsigned long totalpages = totalram_pages + total_swap_pages; > > unsigned long points = 0; > > > > - points = oom_badness(task, NULL, NULL, totalpages) * > > + points = 1000 + oom_badness(task, NULL, NULL, totalpages) * > > 1000 / totalpages; > > seq_printf(m, "%lu\n", points); > > > > And then update the documentation as "from 0 (never kill) to 3000 > > (always kill)" > > This is still not quite there yet, I am afraid. OOM_SCORE_ADJ_MIN tasks have > always reported 0 and I can imagine somebody might depend on this fact. > So you need to special case LONG_MIN at least. It would be also better > to stick with [0, 2000] range. usage * 1000 / total -> [0, 1000] adj -> [-1000, 1000] max (0, usage * 1000 / total + adj) -> [0, 2000] So effectively something like this. diff --git a/fs/proc/base.c b/fs/proc/base.c index d86c0afc8a85..fdf5a0be1cc3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -550,9 +550,9 @@ static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long totalpages = totalram_pages() + total_swap_pages; - unsigned long points = 0; + long points = 0; - points = oom_badness(task, totalpages) * 1000 / totalpages; + points = max(0, oom_badness(task, totalpages)); seq_printf(m, "%lu\n", points); return 0; diff --git a/mm/gup.c b/mm/gup.c index de9e36262ccb..75980dd5a2fc 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1794,7 +1794,6 @@ static long __gup_longterm_locked(struct task_struct *tsk, vmas_tmp, NULL, gup_flags); if (gup_flags & FOLL_LONGTERM) { - memalloc_nocma_restore(flags); if (rc < 0) goto out; @@ -1802,11 +1801,13 @@ static long __gup_longterm_locked(struct task_struct *tsk, for (i = 0; i < rc; i++) put_page(pages[i]); rc = -EOPNOTSUPP; + memalloc_nocma_restore(flags); goto out; } rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages, vmas_tmp, gup_flags); + memalloc_nocma_restore(flags); } out: diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 6e94962893ee..30dbc302a677 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -196,13 +196,13 @@ static bool is_dump_unreclaim_slabs(void) * predictable as possible. The goal is to return the highest value for the * task consuming the most memory to avoid subsequent oom failures. */ -unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) +long oom_badness(struct task_struct *p, unsigned long totalpages) { long points; long adj; if (oom_unkillable_task(p)) - return 0; + return -1; p = find_lock_task_mm(p); if (!p) @@ -229,15 +229,10 @@ unsigned long oom_badness(struct task_struct *p, unsigned long totalpages) mm_pgtables_bytes(p->mm) / PAGE_SIZE; task_unlock(p); - /* Normalize to oom_score_adj units */ - adj *= totalpages / 1000; + points *= 1000 / totalpages; points += adj; - /* - * Never return 0 for an eligible task regardless of the root bonus and - * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). - */ - return points > 0 ? points : 1; + return points; } static const char * const oom_constraint_text[] = { @@ -341,7 +336,7 @@ static int oom_evaluate_task(struct task_struct *task, void *arg) } points = oom_badness(task, oc->totalpages); - if (!points || points < oc->chosen_points) + if (points > 0 || points < oc->chosen_points) goto next; select: -- Michal Hocko SUSE Labs