The patch titled Subject: mm, oom: organize oom context into struct has been added to the -mm tree. Its filename is mm-oom-organize-oom-context-into-struct.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-oom-organize-oom-context-into-struct.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-oom-organize-oom-context-into-struct.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: David Rientjes <rientjes@xxxxxxxxxx> Subject: mm, oom: organize oom context into struct There are essential elements to an oom context that are passed around to multiple functions. Organize these elements into a new struct, struct oom_control, that specifies the context for an oom condition. This patch introduces no functional change. Signed-off-by: David Rientjes <rientjes@xxxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxx> Cc: Sergey Senozhatsky <sergey.senozhatsky.work@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- drivers/tty/sysrq.c | 12 +++- include/linux/oom.h | 25 +++++---- mm/memcontrol.c | 16 ++++- mm/oom_kill.c | 115 +++++++++++++++++++----------------------- mm/page_alloc.c | 10 ++- 5 files changed, 98 insertions(+), 80 deletions(-) diff -puN drivers/tty/sysrq.c~mm-oom-organize-oom-context-into-struct drivers/tty/sysrq.c --- a/drivers/tty/sysrq.c~mm-oom-organize-oom-context-into-struct +++ a/drivers/tty/sysrq.c @@ -353,9 +353,17 @@ static struct sysrq_key_op sysrq_term_op static void moom_callback(struct work_struct *ignored) { + const gfp_t gfp_mask = GFP_KERNEL; + struct oom_control oc = { + .zonelist = node_zonelist(first_memory_node, gfp_mask), + .nodemask = NULL, + .gfp_mask = gfp_mask, + .order = 0, + .force_kill = true, + }; + mutex_lock(&oom_lock); - if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), - GFP_KERNEL, 0, NULL, true)) + if (!out_of_memory(&oc)) pr_info("OOM request ignored because killer is disabled\n"); mutex_unlock(&oom_lock); } diff -puN include/linux/oom.h~mm-oom-organize-oom-context-into-struct include/linux/oom.h --- a/include/linux/oom.h~mm-oom-organize-oom-context-into-struct +++ a/include/linux/oom.h @@ -12,6 +12,14 @@ struct notifier_block; struct mem_cgroup; struct task_struct; +struct oom_control { + struct zonelist *zonelist; + nodemask_t *nodemask; + gfp_t gfp_mask; + int order; + bool force_kill; +}; + /* * Types of limitations to the nodes from which allocations may occur */ @@ -57,21 +65,18 @@ extern unsigned long oom_badness(struct extern int oom_kills_count(void); extern void note_oom_kill(void); -extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, +extern void oom_kill_process(struct oom_control *oc, struct task_struct *p, unsigned int points, unsigned long totalpages, - struct mem_cgroup *memcg, nodemask_t *nodemask, - const char *message); + struct mem_cgroup *memcg, const char *message); -extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, - int order, const nodemask_t *nodemask, +extern void check_panic_on_oom(struct oom_control *oc, + enum oom_constraint constraint, struct mem_cgroup *memcg); -extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, - unsigned long totalpages, const nodemask_t *nodemask, - bool force_kill); +extern enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, + struct task_struct *task, unsigned long totalpages); -extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, - int order, nodemask_t *mask, bool force_kill); +extern bool out_of_memory(struct oom_control *oc); extern void exit_oom_victim(void); diff -puN mm/memcontrol.c~mm-oom-organize-oom-context-into-struct mm/memcontrol.c --- a/mm/memcontrol.c~mm-oom-organize-oom-context-into-struct +++ a/mm/memcontrol.c @@ -1545,6 +1545,13 @@ static unsigned long mem_cgroup_get_limi static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { + struct oom_control oc = { + .zonelist = NULL, + .nodemask = NULL, + .gfp_mask = gfp_mask, + .order = order, + .force_kill = false, + }; struct mem_cgroup *iter; unsigned long chosen_points = 0; unsigned long totalpages; @@ -1563,7 +1570,7 @@ static void mem_cgroup_out_of_memory(str goto unlock; } - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg); + check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg); totalpages = mem_cgroup_get_limit(memcg) ? : 1; for_each_mem_cgroup_tree(iter, memcg) { struct css_task_iter it; @@ -1571,8 +1578,7 @@ static void mem_cgroup_out_of_memory(str css_task_iter_start(&iter->css, &it); while ((task = css_task_iter_next(&it))) { - switch (oom_scan_process_thread(task, totalpages, NULL, - false)) { + switch (oom_scan_process_thread(&oc, task, totalpages)) { case OOM_SCAN_SELECT: if (chosen) put_task_struct(chosen); @@ -1610,8 +1616,8 @@ static void mem_cgroup_out_of_memory(str if (chosen) { points = chosen_points * 1000 / totalpages; - oom_kill_process(chosen, gfp_mask, order, points, totalpages, - memcg, NULL, "Memory cgroup out of memory"); + oom_kill_process(&oc, chosen, points, totalpages, memcg, + "Memory cgroup out of memory"); } unlock: mutex_unlock(&oom_lock); diff -puN mm/oom_kill.c~mm-oom-organize-oom-context-into-struct mm/oom_kill.c --- a/mm/oom_kill.c~mm-oom-organize-oom-context-into-struct +++ a/mm/oom_kill.c @@ -196,27 +196,26 @@ unsigned long oom_badness(struct task_st * Determine the type of allocation constraint. */ #ifdef CONFIG_NUMA -static enum oom_constraint constrained_alloc(struct zonelist *zonelist, - gfp_t gfp_mask, nodemask_t *nodemask, - unsigned long *totalpages) +static enum oom_constraint constrained_alloc(struct oom_control *oc, + unsigned long *totalpages) { struct zone *zone; struct zoneref *z; - enum zone_type high_zoneidx = gfp_zone(gfp_mask); + enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask); bool cpuset_limited = false; int nid; /* Default to all available memory */ *totalpages = totalram_pages + total_swap_pages; - if (!zonelist) + if (!oc->zonelist) return CONSTRAINT_NONE; /* * Reach here only when __GFP_NOFAIL is used. So, we should avoid * to kill current.We have to random task kill in this case. * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. */ - if (gfp_mask & __GFP_THISNODE) + if (oc->gfp_mask & __GFP_THISNODE) return CONSTRAINT_NONE; /* @@ -224,17 +223,18 @@ static enum oom_constraint constrained_a * the page allocator means a mempolicy is in effect. Cpuset policy * is enforced in get_page_from_freelist(). */ - if (nodemask && !nodes_subset(node_states[N_MEMORY], *nodemask)) { + if (oc->nodemask && + !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { *totalpages = total_swap_pages; - for_each_node_mask(nid, *nodemask) + for_each_node_mask(nid, *oc->nodemask) *totalpages += node_spanned_pages(nid); return CONSTRAINT_MEMORY_POLICY; } /* Check this allocation failure is caused by cpuset's wall function */ - for_each_zone_zonelist_nodemask(zone, z, zonelist, - high_zoneidx, nodemask) - if (!cpuset_zone_allowed(zone, gfp_mask)) + for_each_zone_zonelist_nodemask(zone, z, oc->zonelist, + high_zoneidx, oc->nodemask) + if (!cpuset_zone_allowed(zone, oc->gfp_mask)) cpuset_limited = true; if (cpuset_limited) { @@ -246,20 +246,18 @@ static enum oom_constraint constrained_a return CONSTRAINT_NONE; } #else -static enum oom_constraint constrained_alloc(struct zonelist *zonelist, - gfp_t gfp_mask, nodemask_t *nodemask, - unsigned long *totalpages) +static enum oom_constraint constrained_alloc(struct oom_control *oc, + unsigned long *totalpages) { *totalpages = totalram_pages + total_swap_pages; return CONSTRAINT_NONE; } #endif -enum oom_scan_t oom_scan_process_thread(struct task_struct *task, - unsigned long totalpages, const nodemask_t *nodemask, - bool force_kill) +enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, + struct task_struct *task, unsigned long totalpages) { - if (oom_unkillable_task(task, NULL, nodemask)) + if (oom_unkillable_task(task, NULL, oc->nodemask)) return OOM_SCAN_CONTINUE; /* @@ -267,7 +265,7 @@ enum oom_scan_t oom_scan_process_thread( * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { - if (!force_kill) + if (!oc->force_kill) return OOM_SCAN_ABORT; } if (!task->mm) @@ -280,7 +278,7 @@ enum oom_scan_t oom_scan_process_thread( if (oom_task_origin(task)) return OOM_SCAN_SELECT; - if (task_will_free_mem(task) && !force_kill) + if (task_will_free_mem(task) && !oc->force_kill) return OOM_SCAN_ABORT; return OOM_SCAN_OK; @@ -289,12 +287,9 @@ enum oom_scan_t oom_scan_process_thread( /* * Simple selection loop. We chose the process with the highest * number of 'points'. Returns -1 on scan abort. - * - * (not docbooked, we don't want this one cluttering up the manual) */ -static struct task_struct *select_bad_process(unsigned int *ppoints, - unsigned long totalpages, const nodemask_t *nodemask, - bool force_kill) +static struct task_struct *select_bad_process(struct oom_control *oc, + unsigned int *ppoints, unsigned long totalpages) { struct task_struct *g, *p; struct task_struct *chosen = NULL; @@ -304,8 +299,7 @@ static struct task_struct *select_bad_pr for_each_process_thread(g, p) { unsigned int points; - switch (oom_scan_process_thread(p, totalpages, nodemask, - force_kill)) { + switch (oom_scan_process_thread(oc, p, totalpages)) { case OOM_SCAN_SELECT: chosen = p; chosen_points = ULONG_MAX; @@ -318,7 +312,7 @@ static struct task_struct *select_bad_pr case OOM_SCAN_OK: break; }; - points = oom_badness(p, NULL, nodemask, totalpages); + points = oom_badness(p, NULL, oc->nodemask, totalpages); if (!points || points < chosen_points) continue; /* Prefer thread group leaders for display purposes */ @@ -380,13 +374,13 @@ static void dump_tasks(struct mem_cgroup rcu_read_unlock(); } -static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, - struct mem_cgroup *memcg, const nodemask_t *nodemask) +static void dump_header(struct oom_control *oc, struct task_struct *p, + struct mem_cgroup *memcg) { task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " "oom_score_adj=%hd\n", - current->comm, gfp_mask, order, + current->comm, oc->gfp_mask, oc->order, current->signal->oom_score_adj); cpuset_print_task_mems_allowed(current); task_unlock(current); @@ -396,7 +390,7 @@ static void dump_header(struct task_stru else show_mem(SHOW_MEM_FILTER_NODES); if (sysctl_oom_dump_tasks) - dump_tasks(memcg, nodemask); + dump_tasks(memcg, oc->nodemask); } /* @@ -487,10 +481,9 @@ void oom_killer_enable(void) * Must be called while holding a reference to p, which will be released upon * returning. */ -void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, +void oom_kill_process(struct oom_control *oc, struct task_struct *p, unsigned int points, unsigned long totalpages, - struct mem_cgroup *memcg, nodemask_t *nodemask, - const char *message) + struct mem_cgroup *memcg, const char *message) { struct task_struct *victim = p; struct task_struct *child; @@ -514,7 +507,7 @@ void oom_kill_process(struct task_struct task_unlock(p); if (__ratelimit(&oom_rs)) - dump_header(p, gfp_mask, order, memcg, nodemask); + dump_header(oc, p, memcg); task_lock(p); pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n", @@ -537,7 +530,7 @@ void oom_kill_process(struct task_struct /* * oom_badness() returns 0 if the thread is unkillable */ - child_points = oom_badness(child, memcg, nodemask, + child_points = oom_badness(child, memcg, oc->nodemask, totalpages); if (child_points > victim_points) { put_task_struct(victim); @@ -600,8 +593,7 @@ void oom_kill_process(struct task_struct /* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ -void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, - int order, const nodemask_t *nodemask, +void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint, struct mem_cgroup *memcg) { if (likely(!sysctl_panic_on_oom)) @@ -615,7 +607,7 @@ void check_panic_on_oom(enum oom_constra if (constraint != CONSTRAINT_NONE) return; } - dump_header(NULL, gfp_mask, order, memcg, nodemask); + dump_header(oc, NULL, memcg); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } @@ -635,22 +627,16 @@ int unregister_oom_notifier(struct notif EXPORT_SYMBOL_GPL(unregister_oom_notifier); /** - * __out_of_memory - kill the "best" process when we run out of memory - * @zonelist: zonelist pointer - * @gfp_mask: memory allocation flags - * @order: amount of memory being requested as a power of 2 - * @nodemask: nodemask passed to page allocator - * @force_kill: true if a task must be killed, even if others are exiting + * out_of_memory - kill the "best" process when we run out of memory + * @oc: pointer to struct oom_control * * If we run out of memory, we have the choice between either * killing a random task (bad), letting the system crash (worse) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, - int order, nodemask_t *nodemask, bool force_kill) +bool out_of_memory(struct oom_control *oc) { - const nodemask_t *mpol_mask; struct task_struct *p; unsigned long totalpages; unsigned long freed = 0; @@ -684,30 +670,29 @@ bool out_of_memory(struct zonelist *zone * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ - constraint = constrained_alloc(zonelist, gfp_mask, nodemask, - &totalpages); - mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; - check_panic_on_oom(constraint, gfp_mask, order, mpol_mask, NULL); + constraint = constrained_alloc(oc, &totalpages); + if (constraint != CONSTRAINT_MEMORY_POLICY) + oc->nodemask = NULL; + check_panic_on_oom(oc, constraint, NULL); if (sysctl_oom_kill_allocating_task && current->mm && - !oom_unkillable_task(current, NULL, nodemask) && + !oom_unkillable_task(current, NULL, oc->nodemask) && current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { get_task_struct(current); - oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, - nodemask, + oom_kill_process(oc, current, 0, totalpages, NULL, "Out of memory (oom_kill_allocating_task)"); goto out; } - p = select_bad_process(&points, totalpages, mpol_mask, force_kill); + p = select_bad_process(oc, &points, totalpages); /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { - dump_header(NULL, gfp_mask, order, NULL, mpol_mask); + dump_header(oc, NULL, NULL); panic("Out of memory and no killable processes...\n"); } if (p != (void *)-1UL) { - oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, - nodemask, "Out of memory"); + oom_kill_process(oc, p, points, totalpages, NULL, + "Out of memory"); killed = 1; } out: @@ -728,13 +713,21 @@ out: */ void pagefault_out_of_memory(void) { + struct oom_control oc = { + .zonelist = NULL, + .nodemask = NULL, + .gfp_mask = 0, + .order = 0, + .force_kill = false, + }; + if (mem_cgroup_oom_synchronize(true)) return; if (!mutex_trylock(&oom_lock)) return; - if (!out_of_memory(NULL, 0, 0, NULL, false)) { + if (!out_of_memory(&oc)) { /* * There shouldn't be any user tasks runnable while the * OOM killer is disabled, so the current task has to diff -puN mm/page_alloc.c~mm-oom-organize-oom-context-into-struct mm/page_alloc.c --- a/mm/page_alloc.c~mm-oom-organize-oom-context-into-struct +++ a/mm/page_alloc.c @@ -2691,6 +2691,13 @@ static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac, unsigned long *did_some_progress) { + struct oom_control oc = { + .zonelist = ac->zonelist, + .nodemask = ac->nodemask, + .gfp_mask = gfp_mask, + .order = order, + .force_kill = false, + }; struct page *page; *did_some_progress = 0; @@ -2742,8 +2749,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, un goto out; } /* Exhausted what can be done so it's blamo time */ - if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false) - || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) + if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) *did_some_progress = 1; out: mutex_unlock(&oom_lock); _ Patches currently in -mm which might be from rientjes@xxxxxxxxxx are slub-fix-spelling-succedd-to-succeed.patch slab-infrastructure-for-bulk-object-allocation-and-freeing.patch slub-bulk-alloc-extract-objects-from-the-per-cpu-slab.patch slub-improve-bulk-alloc-strategy.patch slub-initial-bulk-free-implementation.patch slub-add-support-for-kmem_cache_debug-in-bulk-calls.patch mm-slub-move-slab-initialization-into-irq-enabled-region.patch mm-slub-fix-slab-double-free-in-case-of-duplicate-sysfs-filename.patch mm-memblock-warn_on-when-nid-differs-from-overlap-region.patch hugetlb-make-the-function-vma_shareable-bool.patch mremap-dont-leak-new_vma-if-f_op-mremap-fails.patch mm-move-mremap-from-file_operations-to-vm_operations_struct.patch mremap-dont-do-mm_populatenew_addr-on-failure.patch mremap-dont-do-uneccesary-checks-if-new_len-==-old_len.patch mremap-simplify-the-overlap-check-in-mremap_to.patch mm-improve-__gfp_noretry-comment-based-on-implementation.patch mm-improve-__gfp_noretry-comment-based-on-implementation-fix.patch mm-oom-organize-oom-context-into-struct.patch mm-oom-pass-an-oom-order-of-1-when-triggered-by-sysrq.patch mm-oom-do-not-panic-for-oom-kills-triggered-from-sysrq.patch page-flags-define-behavior-slb-related-flags-on-compound-pages.patch mm-utilc-add-kstrimdup.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html