On Fri, 30 Jul 2021, Aaron Tomlin wrote: > Documentation/admin-guide/sysctl/vm.rst | 5 ++-- > mm/oom_kill.c | 31 +++++++++++++++++++++---- > 2 files changed, 30 insertions(+), 6 deletions(-) > > diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst > index 003d5cc3751b..4c79fa00ddb3 100644 > --- a/Documentation/admin-guide/sysctl/vm.rst > +++ b/Documentation/admin-guide/sysctl/vm.rst > @@ -650,8 +650,9 @@ oom_dump_tasks > Enables a system-wide task dump (excluding kernel threads) to be produced > when the kernel performs an OOM-killing and includes such information as > pid, uid, tgid, vm size, rss, pgtables_bytes, swapents, oom_score_adj > -score, and name. This is helpful to determine why the OOM killer was > -invoked, to identify the rogue task that caused it, and to determine why > +score, oom eligibility status and name. This is helpful to determine why > +the OOM killer was invoked, to identify the rogue task that caused it, and > +to determine why > the OOM killer chose the task it did to kill. > > If this is set to zero, this information is suppressed. On very > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > index c729a4c4a1ac..36daa6917b62 100644 > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -160,6 +160,27 @@ static inline bool is_sysrq_oom(struct oom_control *oc) > return oc->order == -1; > } > > +/** > + * is_task_eligible_oom - determine if and why a task cannot be OOM killed > + * @tsk: task to check > + * > + * Needs to be called with task_lock(). > + */ > +static const char * const is_task_oom_eligible(struct task_struct *p) > +{ > + long adj; > + > + adj = (long)p->signal->oom_score_adj; > + if (adj == OOM_SCORE_ADJ_MIN) > + return "M"; oom_score_adj is shown already in the tasklist dump, I'm not sure what value this adds. > + else if (test_bit(MMF_OOM_SKIP, &p->mm->flags) > + return "R"; > + else if (in_vfork(p)) > + return "V"; This is going to be racy, we can't show that a task that is emitted as part of the tasklist dump was did not have in_vfork() == true at the time oom_badness() was called. Wouldn't it be better to simply print the output of oom_badness() to the tasklist dump instead so we get complete information? We could simply special case a LONG_MIN return value as -1000 or "min". > + else > + return ""; > +} > + > /* return true if the task is not adequate as candidate victim task. */ > static bool oom_unkillable_task(struct task_struct *p) > { > @@ -401,12 +422,13 @@ static int dump_task(struct task_struct *p, void *arg) > return 0; > } > > - pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n", > + pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %1s %s\n", > task->pid, from_kuid(&init_user_ns, task_uid(task)), > task->tgid, task->mm->total_vm, get_mm_rss(task->mm), > mm_pgtables_bytes(task->mm), > get_mm_counter(task->mm, MM_SWAPENTS), > - task->signal->oom_score_adj, task->comm); > + task->signal->oom_score_adj, is_task_oom_eligible(task), > + task->comm); > task_unlock(task); > > return 0; > @@ -420,12 +442,13 @@ static int dump_task(struct task_struct *p, void *arg) > * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes > * are not shown. > * State information includes task's pid, uid, tgid, vm size, rss, > - * pgtables_bytes, swapents, oom_score_adj value, and name. > + * pgtables_bytes, swapents, oom_score_adj value, oom eligibility status > + * and name. > */ > static void dump_tasks(struct oom_control *oc) > { > pr_info("Tasks state (memory values in pages):\n"); > - pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n"); > + pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj oom_skipped name\n"); > > if (is_memcg_oom(oc)) > mem_cgroup_scan_tasks(oc->memcg, dump_task, oc); > -- > 2.31.1 > >