The patch titled Subject: mm, oom: organize oom context into struct has been added to the -mm tree. Its filename is mm-oom-organize-oom-context-into-struct.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-oom-organize-oom-context-into-struct.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-oom-organize-oom-context-into-struct.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: David Rientjes <rientjes@xxxxxxxxxx> Subject: mm, oom: organize oom context into struct There are essential elements to an oom context that are passed around to multiple functions. Organize these elements into a new struct, struct oom_context, that specifies the context for an oom condition. This patch introduces no functional change. [mhocko@xxxxxxx: s@oom_control@oom_context@] [mhocko@xxxxxxx: do not initialize on stack oom_context with NULL or 0] Signed-off-by: David Rientjes <rientjes@xxxxxxxxxx> Signed-off-by: Michal Hocko <mhocko@xxxxxxx> Cc: Jakob Unterwurzacher <jakobunt@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- drivers/tty/sysrq.c | 10 ++- include/linux/oom.h | 25 +++++---- mm/memcontrol.c | 13 ++-- mm/oom_kill.c | 115 ++++++++++++++++++------------------------ mm/page_alloc.c | 9 ++- 5 files changed, 89 insertions(+), 83 deletions(-) diff -puN drivers/tty/sysrq.c~mm-oom-organize-oom-context-into-struct drivers/tty/sysrq.c --- a/drivers/tty/sysrq.c~mm-oom-organize-oom-context-into-struct +++ a/drivers/tty/sysrq.c @@ -353,9 +353,15 @@ static struct sysrq_key_op sysrq_term_op static void moom_callback(struct work_struct *ignored) { + const gfp_t gfp_mask = GFP_KERNEL; + struct oom_context oc = { + .zonelist = node_zonelist(first_memory_node, gfp_mask), + .gfp_mask = gfp_mask, + .force_kill = true, + }; + mutex_lock(&oom_lock); - if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), - GFP_KERNEL, 0, NULL, true)) + if (!out_of_memory(&oc)) pr_info("OOM request ignored because killer is disabled\n"); mutex_unlock(&oom_lock); } diff -puN include/linux/oom.h~mm-oom-organize-oom-context-into-struct include/linux/oom.h --- a/include/linux/oom.h~mm-oom-organize-oom-context-into-struct +++ a/include/linux/oom.h @@ -12,6 +12,14 @@ struct notifier_block; struct mem_cgroup; struct task_struct; +struct oom_context { + struct zonelist *zonelist; + nodemask_t *nodemask; + gfp_t gfp_mask; + int order; + bool force_kill; +}; + /* * Types of limitations to the nodes from which allocations may occur */ @@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct extern int oom_kills_count(void); extern void note_oom_kill(void); -extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, +extern void oom_kill_process(struct oom_context *oc, struct task_struct *p, unsigned int points, unsigned long totalpages, - struct mem_cgroup *memcg, nodemask_t *nodemask, - const char *message); + struct mem_cgroup *memcg, const char *message); -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, - int order, const nodemask_t *nodemask, +extern void check_panic_on_oom(struct oom_context *oc, + enum oom_constraint constraint, struct mem_cgroup *memcg); -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, - unsigned long totalpages, const nodemask_t *nodemask, - bool force_kill); +extern enum oom_scan_t oom_scan_process_thread(struct oom_context *oc, + struct task_struct *task, unsigned long totalpages); -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, - int order, nodemask_t *mask, bool force_kill); +extern bool out_of_memory(struct oom_context *oc); extern void exit_oom_victim(void); diff -puN mm/memcontrol.c~mm-oom-organize-oom-context-into-struct mm/memcontrol.c --- a/mm/memcontrol.c~mm-oom-organize-oom-context-into-struct +++ a/mm/memcontrol.c @@ -1545,6 +1545,10 @@ static unsigned long mem_cgroup_get_limi static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { + struct oom_context oc = { + .gfp_mask = gfp_mask, + .order = order, + }; struct mem_cgroup *iter; unsigned long chosen_points = 0; unsigned long totalpages; @@ -1563,7 +1567,7 @@ static void mem_cgroup_out_of_memory(str goto unlock; } - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg); + check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg); totalpages = mem_cgroup_get_limit(memcg) ? : 1; for_each_mem_cgroup_tree(iter, memcg) { struct css_task_iter it; @@ -1571,8 +1575,7 @@ static void mem_cgroup_out_of_memory(str css_task_iter_start(&iter->css, &it); while ((task = css_task_iter_next(&it))) { - switch (oom_scan_process_thread(task, totalpages, NULL, - false)) { + switch (oom_scan_process_thread(&oc, task, totalpages)) { case OOM_SCAN_SELECT: if (chosen) put_task_struct(chosen); @@ -1610,8 +1613,8 @@ static void mem_cgroup_out_of_memory(str if (chosen) { points = chosen_points * 1000 / totalpages; - oom_kill_process(chosen, gfp_mask, order, points, totalpages, - memcg, NULL, "Memory cgroup out of memory"); + oom_kill_process(&oc, chosen, points, totalpages, memcg, + "Memory cgroup out of memory"); } unlock: mutex_unlock(&oom_lock); diff -puN mm/oom_kill.c~mm-oom-organize-oom-context-into-struct mm/oom_kill.c --- a/mm/oom_kill.c~mm-oom-organize-oom-context-into-struct +++ a/mm/oom_kill.c @@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_st * Determine the type of allocation constraint. */ #ifdef CONFIG_NUMA -static enum oom_constraint constrained_alloc(struct zonelist *zonelist, - gfp_t gfp_mask, nodemask_t *nodemask, - unsigned long *totalpages) +static enum oom_constraint constrained_alloc(struct oom_context *oc, + unsigned long *totalpages) { struct zone *zone; struct zoneref *z; - enum zone_type high_zoneidx = gfp_zone(gfp_mask); + enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask); bool cpuset_limited = false; int nid; /* Default to all available memory */ *totalpages = totalram_pages + total_swap_pages; - if (!zonelist) + if (!oc->zonelist) return CONSTRAINT_NONE; /* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ - if (gfp_mask & __GFP_THISNODE) + if (oc->gfp_mask & __GFP_THISNODE) return CONSTRAINT_NONE; /* @@ -224,17 +223,18 @@ static enum oom_constraint constrained_a * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). */ - if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) { + if (oc->nodemask && + !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { *totalpages = total_swap_pages; - for_each_node_mask(nid, *nodemask) + for_each_node_mask(nid, *oc->nodemask) *totalpages += node_spanned_pages(nid); return CONSTRAINT_MEMORY_POLICY; } /* Check this allocation failure is caused by cpuset's wall function */ - for_each_zone_zonelist_nodemask(zone, z, zonelist, - high_zoneidx, nodemask) - if (!cpuset_zone_allowed(zone, gfp_mask)) + for_each_zone_zonelist_nodemask(zone, z, oc->zonelist, + high_zoneidx, oc->nodemask) + if (!cpuset_zone_allowed(zone, oc->gfp_mask)) cpuset_limited = true; if (cpuset_limited) { @@ -246,20 +246,18 @@ static enum oom_constraint constrained_a return CONSTRAINT_NONE; } #else -static enum oom_constraint constrained_alloc(struct zonelist *zonelist, - gfp_t gfp_mask, nodemask_t *nodemask, - unsigned long *totalpages) +static enum oom_constraint constrained_alloc(struct oom_context *oc, + unsigned long *totalpages) { *totalpages = totalram_pages + total_swap_pages; return CONSTRAINT_NONE; } #endif -enum oom_scan_t oom_scan_process_thread(struct task_struct *task, - unsigned long totalpages, const nodemask_t *nodemask, - bool force_kill) +enum oom_scan_t oom_scan_process_thread(struct oom_context *oc, + struct task_struct *task, unsigned long totalpages) { - if (oom_unkillable_task(task, NULL, nodemask)) + if (oom_unkillable_task(task, NULL, oc->nodemask)) return OOM_SCAN_CONTINUE; /* @@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread( * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { - if (!force_kill) + if (!oc->force_kill) return OOM_SCAN_ABORT; } if (!task->mm) @@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread( if (oom_task_origin(task)) return OOM_SCAN_SELECT; - if (task_will_free_mem(task) && !force_kill) + if (task_will_free_mem(task) && !oc->force_kill) return OOM_SCAN_ABORT; return OOM_SCAN_OK; @@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread( /* * Simple selection loop. We chose the process with the highest * number of 'points'. Returns -1 on scan abort. - * - * (not docbooked, we don't want this one cluttering up the manual) */ -static struct task_struct *select_bad_process(unsigned int *ppoints, - unsigned long totalpages, const nodemask_t *nodemask, - bool force_kill) +static struct task_struct *select_bad_process(struct oom_context *oc, + unsigned int *ppoints, unsigned long totalpages) { struct task_struct *g, *p; struct task_struct *chosen = NULL; @@ -304,8 +299,7 @@ static struct task_struct *select_bad_pr for_each_process_thread(g, p) { unsigned int points; - switch (oom_scan_process_thread(p, totalpages, nodemask, - force_kill)) { + switch (oom_scan_process_thread(oc, p, totalpages)) { case OOM_SCAN_SELECT: chosen = p; chosen_points = ULONG_MAX; @@ -318,7 +312,7 @@ static struct task_struct *select_bad_pr case OOM_SCAN_OK: break; }; - points = oom_badness(p, NULL, nodemask, totalpages); + points = oom_badness(p, NULL, oc->nodemask, totalpages); if (!points || points < chosen_points) continue; /* Prefer thread group leaders for display purposes */ @@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup rcu_read_unlock(); } -static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, - struct mem_cgroup *memcg, const nodemask_t *nodemask) +static void dump_header(struct oom_context *oc, struct task_struct *p, + struct mem_cgroup *memcg) { task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " "oom_score_adj=%hd\n", - current->comm, gfp_mask, order, + current->comm, oc->gfp_mask, oc->order, current->signal->oom_score_adj); cpuset_print_task_mems_allowed(current); task_unlock(current); @@ -396,7 +390,7 @@ static void dump_header(struct task_stru else show_mem(SHOW_MEM_FILTER_NODES); if (sysctl_oom_dump_tasks) - dump_tasks(memcg, nodemask); + dump_tasks(memcg, oc->nodemask); } /* @@ -487,10 +481,9 @@ void oom_killer_enable(void) * Must be called while holding a reference to p, which will be released upon * returning. */ -void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, +void oom_kill_process(struct oom_context *oc, struct task_struct *p, unsigned int points, unsigned long totalpages, - struct mem_cgroup *memcg, nodemask_t *nodemask, - const char *message) + struct mem_cgroup *memcg, const char *message) { struct task_struct *victim = p; struct task_struct *child; @@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct task_unlock(p); if (__ratelimit(&oom_rs)) - dump_header(p, gfp_mask, order, memcg, nodemask); + dump_header(oc, p, memcg); task_lock(p); pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", @@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct /* * oom_badness() returns 0 if the thread is unkillable */ - child_points = oom_badness(child, memcg, nodemask, + child_points = oom_badness(child, memcg, oc->nodemask, totalpages); if (child_points > victim_points) { put_task_struct(victim); @@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct /* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ -void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, - int order, const nodemask_t *nodemask, +void check_panic_on_oom(struct oom_context *oc, enum oom_constraint constraint, struct mem_cgroup *memcg) { if (likely(!sysctl_panic_on_oom)) @@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constra if (constraint != CONSTRAINT_NONE) return; } - dump_header(NULL, gfp_mask, order, memcg, nodemask); + dump_header(oc, NULL, memcg); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } @@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notif EXPORT_SYMBOL_GPL(unregister_oom_notifier); /** - * __out_of_memory - kill the "best" process when we run out of memory - * @zonelist: zonelist pointer - * @gfp_mask: memory allocation flags - * @order: amount of memory being requested as a power of 2 - * @nodemask: nodemask passed to page allocator - * @force_kill: true if a task must be killed, even if others are exiting + * out_of_memory - kill the "best" process when we run out of memory + * @oc: pointer to struct oom_context * * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, - int order, nodemask_t *nodemask, bool force_kill) +bool out_of_memory(struct oom_context *oc) { - const nodemask_t *mpol_mask; struct task_struct *p; unsigned long totalpages; unsigned long freed = 0; @@ -661,7 +647,7 @@ bool out_of_memory(struct zonelist *zone if (oom_killer_disabled) return false; - if (!force_kill) { + if (!oc->force_kill) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) /* Got some memory back in the last second. */ @@ -686,39 +672,38 @@ bool out_of_memory(struct zonelist *zone * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ - constraint = constrained_alloc(zonelist, gfp_mask, nodemask, - &totalpages); - mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; - if (!force_kill) - check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL); + constraint = constrained_alloc(oc, &totalpages); + if (constraint != CONSTRAINT_MEMORY_POLICY) + oc->nodemask = NULL; + if (!oc->force_kill) + check_panic_on_oom(oc, constraint, NULL); /* * not affecting force_kill because sysrq triggered OOM killer runs from * the workqueue context so current->mm will be NULL */ if (sysctl_oom_kill_allocating_task && current->mm && - !oom_unkillable_task(current, NULL, nodemask) && + !oom_unkillable_task(current, NULL, oc->nodemask) && current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { get_task_struct(current); - oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, - nodemask, + oom_kill_process(oc, current, 0, totalpages, NULL, "Out of memory (oom_kill_allocating_task)"); goto out; } - p = select_bad_process(&points, totalpages, mpol_mask, force_kill); + p = select_bad_process(oc, &points, totalpages); /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { - if (!force_kill) { - dump_header(NULL, gfp_mask, order, NULL, mpol_mask); + if (!oc->force_kill) { + dump_header(oc, NULL, NULL); panic("Out of memory and no killable processes...\n"); } else { pr_info("Sysrq triggered out of memory. No killable task found...\n"); } } if (p != (void *)-1UL) { - oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, - nodemask, "Out of memory"); + oom_kill_process(oc, p, points, totalpages, NULL, + "Out of memory"); killed = 1; } out: @@ -739,13 +724,15 @@ out: */ void pagefault_out_of_memory(void) { + struct oom_context oc = { 0 }; + if (mem_cgroup_oom_synchronize(true)) return; if (!mutex_trylock(&oom_lock)) return; - if (!out_of_memory(NULL, 0, 0, NULL, false)) { + if (!out_of_memory(&oc)) { /* * There shouldn't be any user tasks runnable while the * OOM killer is disabled, so the current task has to diff -puN mm/page_alloc.c~mm-oom-organize-oom-context-into-struct mm/page_alloc.c --- a/mm/page_alloc.c~mm-oom-organize-oom-context-into-struct +++ a/mm/page_alloc.c @@ -2671,6 +2671,12 @@ static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac, unsigned long *did_some_progress) { + struct oom_context oc = { + .zonelist = ac->zonelist, + .nodemask = ac->nodemask, + .gfp_mask = gfp_mask, + .order = order, + }; struct page *page; *did_some_progress = 0; @@ -2722,8 +2728,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, un goto out; } /* Exhausted what can be done so it's blamo time */ - if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false) - || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) + if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) *did_some_progress = 1; out: mutex_unlock(&oom_lock); _ Patches currently in -mm which might be from rientjes@xxxxxxxxxx are slab-infrastructure-for-bulk-object-allocation-and-freeing-v3.patch slab-infrastructure-for-bulk-object-allocation-and-freeing-v3-fix.patch slub-bulk-alloc-extract-objects-from-the-per-cpu-slab.patch oom-do-not-panic-when-oom-killer-is-sysrq-triggered.patch oom-do-not-invoke-oom-notifiers-on-sysrqf.patch mm-oom-organize-oom-context-into-struct.patch oom-split-out-forced-oom-killer.patch page-flags-define-behavior-slb-related-flags-on-compound-pages.patch mm-utilc-add-kstrimdup.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html