This patch: - Moves lib/show_mem.c to mm/show_mem.c - Changes show_mem() to always report on slab usage - Instead of reporting on all slabs, we only report on top 10 slabs, and in sorted order - Also reports on shrinkers, with the new shrinkers_to_text(). More OOM reporting can be moved to show_mem.c and improved, this patch is only a small start. Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx> --- lib/Makefile | 2 +- mm/Makefile | 2 +- mm/oom_kill.c | 23 ------------------ {lib => mm}/show_mem.c | 14 +++++++++++ mm/slab.h | 6 +++-- mm/slab_common.c | 53 +++++++++++++++++++++++++++++++++++------- 6 files changed, 65 insertions(+), 35 deletions(-) rename {lib => mm}/show_mem.c (78%) diff --git a/lib/Makefile b/lib/Makefile index 31a3904eda..c5041d33d0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -30,7 +30,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o timerqueue.o xarray.o \ idr.o extable.o sha1.o irq_regs.o argv_split.o \ - flex_proportions.o ratelimit.o show_mem.o \ + flex_proportions.o ratelimit.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \ diff --git a/mm/Makefile b/mm/Makefile index 70d4309c9c..97c0be12f3 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -54,7 +54,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ mm_init.o percpu.o slab_common.o \ compaction.o vmacache.o \ interval_tree.o list_lru.o workingset.o \ - debug.o gup.o mmap_lock.o $(mmu-y) + debug.o gup.o mmap_lock.o show_mem.o $(mmu-y) # Give 'page_alloc' its own module-parameter namespace page-alloc-y := page_alloc.o diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 832fb33037..659c7d6376 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -171,27 +171,6 @@ static bool oom_unkillable_task(struct task_struct *p) return false; } -/* - * Check whether unreclaimable slab amount is greater than - * all user memory(LRU pages). - * dump_unreclaimable_slab() could help in the case that - * oom due to too much unreclaimable slab used by kernel. -*/ -static bool should_dump_unreclaim_slab(void) -{ - unsigned long nr_lru; - - nr_lru = global_node_page_state(NR_ACTIVE_ANON) + - global_node_page_state(NR_INACTIVE_ANON) + - global_node_page_state(NR_ACTIVE_FILE) + - global_node_page_state(NR_INACTIVE_FILE) + - global_node_page_state(NR_ISOLATED_ANON) + - global_node_page_state(NR_ISOLATED_FILE) + - global_node_page_state(NR_UNEVICTABLE); - - return (global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B) > nr_lru); -} - /** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate @@ -465,8 +444,6 @@ static void dump_header(struct oom_control *oc, struct task_struct *p) mem_cgroup_print_oom_meminfo(oc->memcg); else { show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask); - if (should_dump_unreclaim_slab()) - dump_unreclaimable_slab(); } if (sysctl_oom_dump_tasks) dump_tasks(oc); diff --git a/lib/show_mem.c b/mm/show_mem.c similarity index 78% rename from lib/show_mem.c rename to mm/show_mem.c index 1c26c14ffb..c9f37f13d6 100644 --- a/lib/show_mem.c +++ b/mm/show_mem.c @@ -7,11 +7,15 @@ #include <linux/mm.h> #include <linux/cma.h> +#include <linux/printbuf.h> + +#include "slab.h" void show_mem(unsigned int filter, nodemask_t *nodemask) { pg_data_t *pgdat; unsigned long total = 0, reserved = 0, highmem = 0; + struct printbuf buf = PRINTBUF; printk("Mem-Info:\n"); show_free_areas(filter, nodemask); @@ -41,4 +45,14 @@ void show_mem(unsigned int filter, nodemask_t *nodemask) #ifdef CONFIG_MEMORY_FAILURE printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); #endif + + pr_info("Unreclaimable slab info:\n"); + dump_unreclaimable_slab(&buf); + printk("%s", buf.buf); + printbuf_reset(&buf); + + printk("Shrinkers:\n"); + shrinkers_to_text(&buf); + printk("%s", buf.buf); + printbuf_exit(&buf); } diff --git a/mm/slab.h b/mm/slab.h index c7f2abc2b1..abefbf7674 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -788,10 +788,12 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) #endif +struct printbuf; + #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) -void dump_unreclaimable_slab(void); +void dump_unreclaimable_slab(struct printbuf *); #else -static inline void dump_unreclaimable_slab(void) +static inline void dump_unreclaimable_slab(struct printbuf *out) { } #endif diff --git a/mm/slab_common.c b/mm/slab_common.c index 23f2ab0713..cb1c548c73 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -24,6 +24,7 @@ #include <asm/tlbflush.h> #include <asm/page.h> #include <linux/memcontrol.h> +#include <linux/printbuf.h> #define CREATE_TRACE_POINTS #include <trace/events/kmem.h> @@ -1084,10 +1085,15 @@ static int slab_show(struct seq_file *m, void *p) return 0; } -void dump_unreclaimable_slab(void) +void dump_unreclaimable_slab(struct printbuf *out) { struct kmem_cache *s; struct slabinfo sinfo; + struct slab_by_mem { + struct kmem_cache *s; + size_t total, active; + } slabs_by_mem[10], n; + int i, nr = 0; /* * Here acquiring slab_mutex is risky since we don't prefer to get @@ -1097,12 +1103,11 @@ void dump_unreclaimable_slab(void) * without acquiring the mutex. */ if (!mutex_trylock(&slab_mutex)) { - pr_warn("excessive unreclaimable slab but cannot dump stats\n"); + pr_buf(out, "excessive unreclaimable slab but cannot dump stats\n"); return; } - pr_info("Unreclaimable slab info:\n"); - pr_info("Name Used Total\n"); + buf->atomic++; list_for_each_entry(s, &slab_caches, list) { if (s->flags & SLAB_RECLAIM_ACCOUNT) @@ -1110,11 +1115,43 @@ void dump_unreclaimable_slab(void) get_slabinfo(s, &sinfo); - if (sinfo.num_objs > 0) - pr_info("%-17s %10luKB %10luKB\n", s->name, - (sinfo.active_objs * s->size) / 1024, - (sinfo.num_objs * s->size) / 1024); + if (!sinfo.num_objs) + continue; + + n.s = s; + n.total = sinfo.num_objs * s->size; + n.active = sinfo.active_objs * s->size; + + for (i = 0; i < nr; i++) + if (n.total < slabs_by_mem[i].total) + break; + + if (nr < ARRAY_SIZE(slabs_by_mem)) { + memmove(&slabs_by_mem[i + 1], + &slabs_by_mem[i], + sizeof(slabs_by_mem[0]) * (nr - i)); + nr++; + } else if (i) { + i--; + memmove(&slabs_by_mem[0], + &slabs_by_mem[1], + sizeof(slabs_by_mem[0]) * i); + } else { + continue; + } + + slabs_by_mem[i] = n; + } + + for (i = nr - 1; i >= 0; --i) { + pr_buf(out, "%-17s total: ", slabs_by_mem[i].s->name); + pr_human_readable_u64(out, slabs_by_mem[i].total); + pr_buf(out, " active: "); + pr_human_readable_u64(out, slabs_by_mem[i].active); + pr_newline(out); } + + --buf->atomic; mutex_unlock(&slab_mutex); } -- 2.35.2