> + > +static void select_victim_memcg(struct mem_cgroup *root, struct oom_control *oc) > +{ > + struct mem_cgroup *iter; > + > + oc->chosen_memcg = NULL; > + oc->chosen_points = 0; > + > + /* > + * The oom_score is calculated for leaf memory cgroups (including > + * the root memcg). > + */ > + rcu_read_lock(); > + for_each_mem_cgroup_tree(iter, root) { > + long score; > + > + if (memcg_has_children(iter) && iter != root_mem_cgroup) > + continue; > + Cgroup v2 does not support charge migration between memcgs. So, there can be intermediate nodes which may contain the major charge of the processes in their leave descendents. Skipping such intermediate nodes will kind of protect such processes from oom-killer (lower on the list to be killed). Is it ok to not handle such scenario? If yes, shouldn't we document it? > + score = oom_evaluate_memcg(iter, oc->nodemask, oc->totalpages); > + > + /* > + * Ignore empty and non-eligible memory cgroups. > + */ > + if (score == 0) > + continue; > + > + /* > + * If there are inflight OOM victims, we don't need > + * to look further for new victims. > + */ > + if (score == -1) { > + oc->chosen_memcg = INFLIGHT_VICTIM; > + mem_cgroup_iter_break(root, iter); > + break; > + } > + > + if (score > oc->chosen_points) { > + oc->chosen_points = score; > + oc->chosen_memcg = iter; > + } > + } > + > + if (oc->chosen_memcg && oc->chosen_memcg != INFLIGHT_VICTIM) > + css_get(&oc->chosen_memcg->css); > + > + rcu_read_unlock(); > +} -- To unsubscribe from this list: send the line "unsubscribe cgroups" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html