Introduce -t flag for kmem command to support getting page owner and slab debug trace. Here is the user help manual: 1. Dump page_owner allocated stack trace for each allocated page in buddy system when used with "kmem -pt": crash> kmem -pt Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns PFN 0x40000, type Movable, Flags 0xffff00000020836 set_page_owner+84 post_alloc_hook+308 prep_new_page+48 get_page_from_freelist+736 __alloc_pages+348 alloc_pages+280 __page_cache_alloc+120 page_cache_ra_unbounded+272 do_page_cache_ra+172 do_sync_mmap_readahead+492 filemap_fault+340 __do_fault+64 __handle_mm_fault+528 handle_mm_fault+208 __do_page_fault+232 do_page_fault+1264 ...... 2. Dump page_owner allocated/freed trace for an allocated page when used "kmem -pt" with a page address. 3. Dump slab debug trace when used "-st" with an allocated slab object address: crash> kmem -st ffff000007e79d00 CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME ffff000001c0ed00 3392 93 104 13 32k task_struct SLAB MEMORY NODE TOTAL ALLOCATED FREE fffffc00001f9e00 ffff000007e78000 0 8 6 2 FREE / [ALLOCATED] [ffff000007e79d00] object ffff000007e79d00 allocated in alloc_task_struct_node+36 age=8431 cpu=2 pid=415 __slab_alloc+60 kmem_cache_alloc_node+528 alloc_task_struct_node+36 dup_task_struct+56 copy_process+724 kernel_clone+276 __do_sys_clone+152 __se_sys_clone+60 __arm64_sys_clone+88 __invoke_syscall+36 invoke_syscall+284 el0_svc_common+248 do_el0_svc+56 el0_svc+248 el0t_64_sync_handler+92 el0t_64_sync+344 object ffff000007e79d00 freed in free_task_struct+32 age=12132 cpu=1 pid=0 kmem_cache_free+780 free_task_struct+32 free_task+164 __put_task_struct+328 put_task_struct+44 delayed_put_task_struct+64 rcu_do_batch+972 rcu_core+592 rcu_core_si+24 __softirqentry_text_start+388 do_softirq_own_stack+12 invoke_softirq+216 __irq_exit_rcu+164 irq_exit+20 handle_domain_irq+120 4. Dump slab debug trace for each allocated object belongs to this slab when used "-st" with an slab page address. 5. Dump slab debug trace for each allocated object belongs to slab cache when used "-S -t" with a slab cache address. With this patch, the page allocation times can be sorted by page_owner_sort tool easily, and the slab allocation/free times can be sorted by a script. Signed-off-by: qiwu.chen <qiwu.chen@xxxxxxxxxxxxx> --- defs.h | 50 ++++++ help.c | 8 +- memory.c | 538 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 572 insertions(+), 24 deletions(-) diff --git a/defs.h b/defs.h index dfbd241..e9eb9e3 100644 --- a/defs.h +++ b/defs.h @@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s ? TRUE : FALSE); } #undef roundup #endif #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define rounddown(x, y) ((x) & ~__round_mask(x, y)) +#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0) + +/* stack depot macros before kernel commit 8151c7a35d8bd */ +#define STACK_ALLOC_ALIGN 4 +#define STACK_ALLOC_NULL_PROTECTION_BITS 1 +#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ +#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() - STACK_ALLOC_ALIGN) +#define DEPOT_STACK_BITS (sizeof(uint) * 8) +#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ + STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) + +/* stack depot macros since kernel commit 8151c7a35d8bd */ +#define STACK_DEPOT_EXTRA_BITS 5 +#define DEPOT_HANDLE_BITS (sizeof(uint) * 8) +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ +#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER)) +#define DEPOT_STACK_ALIGN 4 +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() - DEPOT_STACK_ALIGN) +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ + STACK_DEPOT_EXTRA_BITS) + +/* GFP flags */ +#define __GFP_RECLAIMABLE 0x10u +#define __GFP_MOVABLE 0x08u +#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) +#define GFP_MOVABLE_SHIFT 3 typedef uint64_t physaddr_t; @@ -1626,6 +1654,7 @@ struct offset_table { /* stash of commonly-used offsets */ long pglist_data_node_present_pages; long pglist_data_node_spanned_pages; long pglist_data_bdata; + long pglist_data_node_page_ext; long page_cache_bucket_chain; long zone_struct_free_pages; long zone_struct_free_area; @@ -2243,6 +2272,23 @@ struct offset_table { /* stash of commonly-used offsets */ long vmap_node_busy; long rb_list_head; long file_f_inode; + long stack_record_size; + long stack_record_entries; + long stack_record_count; + long page_owner_order; + long page_owner_gfp_mask; + long page_owner_ts_nsec; + long page_owner_free_ts_nsec; + long page_owner_pid; + long page_owner_handle; + long page_owner_free_handle; + long mem_section_page_ext; + long track_addr; + long track_addrs; + long track_pid; + long track_cpu; + long track_when; + long track_handle; }; struct size_table { /* stash of commonly-used sizes */ @@ -2419,6 +2465,10 @@ struct size_table { /* stash of commonly-used sizes */ long module_memory; long fred_frame; long vmap_node; + long page_ext; + long page_owner; + long stack_record; + long track; }; struct array_table { diff --git a/help.c b/help.c index e95ac1d..6a59064 100644 --- a/help.c +++ b/help.c @@ -6815,8 +6815,8 @@ NULL char *help_kmem[] = { "kmem", "kernel memory", -"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n" -" [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]", +"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n" +" [[-s|-S|-S=cpu[s]|-r|-t] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]", " This command displays information about the use of kernel memory.\n", " -f displays the contents of the system free memory headers.", " also verifies that the page count equals nr_free_pages.", @@ -6845,6 +6845,7 @@ char *help_kmem[] = { " mem_map[] array, made up of the page struct address, its associated", " physical address, the page.mapping, page.index, page._count and", " page.flags fields.", +" -t displays page_owner allocated stack trace for each allocated page in buddy system.", " -m member similar to -p, but displays page structure contents specified by", " a comma-separated list of one or more struct page members. The", " \"flags\" member will always be expressed in hexadecimal format, and", @@ -6893,12 +6894,15 @@ char *help_kmem[] = { " address when used with -s or -S, searches the kmalloc() slab subsystem", " for the slab containing of this virtual address, showing whether", " it is in use or free.", +" when added extra -t, displays the slab debug trace for the allocated", +" object belongs to this slab", " address when used with -f, the address can be either a page pointer,", " a physical address, or a kernel virtual address; the free_area", " header containing the page (if any) is displayed.", " address when used with -p, the address can be either a page pointer, a", " physical address, or a kernel virtual address; its basic mem_map", " page information is displayed.", +" when added extra -t, display the page_owner traces for this page", " address when used with -m, the address can be either a page pointer, a", " physical address, or a kernel virtual address; the specified", " members of the associated page struct are displayed.", diff --git a/memory.c b/memory.c index a74ebaf..7bf8f86 100644 --- a/memory.c +++ b/memory.c @@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong); static void dump_hstates(void); static ulong freelist_ptr(struct meminfo *, ulong, ulong); static ulong handle_each_vm_area(struct handle_each_vm_area_args *); +static void page_owner_init(void); +static int page_owner_enabled(void); +static void stack_depot_init(void); +static void dump_page_owner(struct meminfo *, ulong, physaddr_t); +enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL }; /* * Memory display modes specific to this file. @@ -860,6 +865,16 @@ vm_init(void) "kmem_cache_node", "partial"); MEMBER_OFFSET_INIT(kmem_cache_node_full, "kmem_cache_node", "full"); + + STRUCT_SIZE_INIT(track, "track"); + MEMBER_OFFSET_INIT(track_addr, "track", "addr"); + if (MEMBER_EXISTS("track", "addrs")) + MEMBER_OFFSET_INIT(track_addrs, "track", "addrs"); + if (MEMBER_EXISTS("track", "handle")) + MEMBER_OFFSET_INIT(track_handle, "track", "handle"); + MEMBER_OFFSET_INIT(track_when, "track", "when"); + MEMBER_OFFSET_INIT(track_cpu, "track", "cpu"); + MEMBER_OFFSET_INIT(track_pid, "track", "pid"); } else { MEMBER_OFFSET_INIT(kmem_cache_s_c_nextp, "kmem_cache_s", "c_nextp"); @@ -983,6 +998,8 @@ vm_init(void) vt->flags |= DISCONTIGMEM; sparse_mem_init(); + page_owner_init(); + stack_depot_init(); vt->vmalloc_start = machdep->vmalloc_start(); if (IS_VMALLOC_ADDR(vt->mem_map)) @@ -1099,6 +1116,8 @@ vm_init(void) MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data", "bdata"); MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data", "nr_zones"); + MEMBER_OFFSET_INIT(pglist_data_node_page_ext, "pglist_data", + "node_page_ext"); MEMBER_OFFSET_INIT(pglist_data_node_start_pfn, "pglist_data", "node_start_pfn"); MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data", @@ -5037,6 +5056,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) #define SLAB_BITFIELD (ADDRESS_SPECIFIED << 25) #define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26) #define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27) +#define GET_SLAB_DEBUG_TRACE (ADDRESS_SPECIFIED << 28) +#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 29) #define GET_ALL \ (GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES) @@ -5048,7 +5069,7 @@ cmd_kmem(void) int c; int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag; int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag; - int rflag; + int rflag, tflag; struct meminfo meminfo; ulonglong value[MAXARGS]; char buf[BUFSIZE]; @@ -5061,13 +5082,13 @@ cmd_kmem(void) spec_addr = choose_cpu = 0; sflag = Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 0; vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0; - gflag = hflag = rflag = 0; + gflag = hflag = rflag = tflag = 0; escape = FALSE; BZERO(&meminfo, sizeof(struct meminfo)); BZERO(&value[0], sizeof(ulonglong)*MAXARGS); pc->curcmd_flags &= ~HEADER_PRINTED; - while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh")) != EOF) { + while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht")) != EOF) { switch(c) { case 'V': @@ -5204,6 +5225,10 @@ cmd_kmem(void) gflag = 1; break; + case 't': + tflag = 1; + break; + default: argerrs++; break; @@ -5213,7 +5238,7 @@ cmd_kmem(void) if (argerrs) cmd_usage(pc->curcmd, SYNOPSIS); - if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag + + if ((fflag + Fflag + Vflag + oflag + vflag + Cflag + cflag + iflag + lflag + Lflag + gflag + hflag + rflag) > 1) { error(INFO, "only one flag allowed!\n"); @@ -5264,10 +5289,13 @@ cmd_kmem(void) if (pflag) { meminfo.spec_addr = value[i]; meminfo.flags = ADDRESS_SPECIFIED; + if (tflag) + meminfo.flags |= GET_PAGE_OWNER; dump_mem_map(&meminfo); pflag++; } + if (sflag || Sflag) { if (vt->flags & KMEM_CACHE_UNAVAIL) error(FATAL, @@ -5292,6 +5320,8 @@ cmd_kmem(void) meminfo.reqname = p1; meminfo.cache = value[i]; meminfo.flags |= CACHE_SET; + if (tflag) + meminfo.flags |= GET_SLAB_DEBUG_TRACE; if ((i+1) == spec_addr) { /* done? */ if (meminfo.calls++) fprintf(fp, "\n"); @@ -5301,6 +5331,8 @@ cmd_kmem(void) } else { meminfo.spec_addr = value[i]; meminfo.flags = ADDRESS_SPECIFIED; + if (tflag) + meminfo.flags |= GET_SLAB_DEBUG_TRACE; if (Sflag && (vt->flags & KMALLOC_SLUB)) meminfo.flags |= VERBOSE; if (meminfo.calls++) @@ -5346,25 +5378,28 @@ cmd_kmem(void) gflag++; } - /* - * no value arguments allowed! - */ - if (zflag || nflag || iflag || Fflag || Cflag || Lflag || + if (tflag) + tflag++; + + /* + * no value arguments allowed! + */ + if (zflag || nflag || iflag || Fflag || Cflag || Lflag || Vflag || oflag || hflag || rflag) { error(INFO, "no address arguments allowed with this option\n"); cmd_usage(pc->curcmd, SYNOPSIS); } - if (!(sflag + Sflag + pflag + fflag + vflag + cflag + - lflag + Lflag + gflag)) { + if (!(sflag + Sflag + pflag + fflag + vflag + cflag + + lflag + Lflag + gflag + tflag)) { meminfo.spec_addr = value[i]; - meminfo.flags = ADDRESS_SPECIFIED; - if (meminfo.calls++) - fprintf(fp, "\n"); + meminfo.flags = ADDRESS_SPECIFIED; + if (meminfo.calls++) + fprintf(fp, "\n"); else kmem_cache_init(); - kmem_search(&meminfo); + kmem_search(&meminfo); } } @@ -5372,8 +5407,11 @@ cmd_kmem(void) if (iflag == 1) dump_kmeminfo(); - if (pflag == 1) + if (pflag == 1) { + if (tflag) + meminfo.flags = GET_PAGE_OWNER; dump_mem_map(&meminfo); + } if (fflag == 1) vt->dump_free_pages(&meminfo); @@ -5457,7 +5495,7 @@ cmd_kmem(void) if (!(sflag + Sflag + pflag + fflag + Fflag + vflag + Vflag + zflag + oflag + cflag + Cflag + iflag + nflag + lflag + Lflag + gflag + hflag + rflag + - meminfo.calls)) + tflag + meminfo.calls)) cmd_usage(pc->curcmd, SYNOPSIS); } @@ -5749,7 +5787,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi) switch (mi->flags) { - case ADDRESS_SPECIFIED: + case ADDRESS_SPECIFIED: + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: switch (mi->memtype) { case KVADDR: @@ -5774,6 +5813,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi) print_hdr = TRUE; break; + case GET_PAGE_OWNER: + print_hdr = FALSE; + break; + case GET_ALL: shared = 0; reserved = 0; @@ -5926,6 +5969,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi) /* FALLTHROUGH */ + case GET_PAGE_OWNER: + dump_page_owner(mi, pp, phys); + break; + case GET_SLAB_PAGES: if (v22) { if ((flags >> v22_PG_Slab) & 1) @@ -6083,6 +6130,7 @@ display_members: if (done) break; + } if (done) @@ -6119,7 +6167,10 @@ display_members: break; case ADDRESS_SPECIFIED: + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: mi->retval = done; + if (mi->flags & GET_PAGE_OWNER) + dump_page_owner(mi, pp, phys); break; } @@ -6129,6 +6180,331 @@ display_members: FREEBUF(page_cache); } +static int stack_depot_enabled(void) +{ + struct syment *sp; + int disable = TRUE; + + if ((sp = symbol_search("stack_depot_disable"))) + readmem(sp->value, KVADDR, &disable, sizeof(int), + "stack_depot_disable", RETURN_ON_ERROR); + else if ((sp = symbol_search("stack_depot_disabled"))) + readmem(sp->value, KVADDR, &disable, sizeof(int), + "stack_depot_disabled", RETURN_ON_ERROR); + else if ((sp = symbol_search("stack_slabs"))) + return sp->value ? FALSE : TRUE; + + return !disable; +} + +static void stack_depot_init(void) +{ + if (stack_depot_enabled()) { + STRUCT_SIZE_INIT(stack_record, "stack_record"); + MEMBER_OFFSET_INIT(stack_record_size, "stack_record", "size"); + MEMBER_OFFSET_INIT(stack_record_entries, "stack_record", "entries"); + if (MEMBER_EXISTS("stack_record", "count")) + MEMBER_OFFSET_INIT(stack_record_count, "stack_record", "count"); + } +} + +/* Fetch stack entries from a depot. */ +static unsigned int stack_depot_fetch(uint handle, ulong *entries) +{ + struct syment *sp; + uint valid, offset, slabindex, poolindex, pools_num, stack_record_count, stack_size = 0; + ulong stack_record_addr, sym_value; + + if (!handle) + return 0; + + if ((sp = symbol_search("stack_slabs"))) { + valid = (handle >> (STACK_ALLOC_INDEX_BITS + STACK_ALLOC_OFFSET_BITS)) & STACK_ALLOC_NULL_PROTECTION_BITS; + if (!valid) + return 0; + + slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1); + if (!readmem(sp->value + slabindex * sizeof(void *), KVADDR, &stack_record_addr, + sizeof(void *), "stack_record_addr", FAULT_ON_ERROR)) + return 0; + + offset = (handle >> STACK_ALLOC_INDEX_BITS) & ((1 << STACK_ALLOC_OFFSET_BITS) - 1); + stack_record_addr += (offset << STACK_ALLOC_ALIGN); + *entries = stack_record_addr + OFFSET(stack_record_entries); + if (!readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size, sizeof(stack_size), + "stack_record_entries", FAULT_ON_ERROR)) + return 0; + } else if ((sp = symbol_search("stack_pools")) && (sym_value = symbol_value("pools_num"))) { + poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1); + readmem(sym_value, KVADDR, &pools_num, sizeof(int), "pools_num", RETURN_ON_ERROR); + if (poolindex >= pools_num) { + error(INFO, "pool index %d out of bounds (%d) for stack id %08x\n", poolindex, pools_num, handle); + return 0; + } + + readmem(sp->value + (poolindex-1) * sizeof(void *), KVADDR, &stack_record_addr, + sizeof(void *), "stack_record_addr", FAULT_ON_ERROR); + if (!stack_record_addr) + return 0; + + offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 << DEPOT_OFFSET_BITS) - 1); + stack_record_addr += (offset << DEPOT_STACK_ALIGN); + if (!readmem(stack_record_addr + OFFSET(stack_record_count), KVADDR, &stack_record_count, + sizeof(stack_record_count), "stack_record_count", FAULT_ON_ERROR) || !stack_record_count) + return 0; + + *entries = stack_record_addr + OFFSET(stack_record_entries); + if (!readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size, sizeof(stack_size), + "stack_record_entries", FAULT_ON_ERROR)) + return 0; + } + + return stack_size; +} + +static void stack_trace_print(ulong entries, uint nr_entries) +{ + int i; + struct syment *sp; + ulong value, offset; + char buf[BUFSIZE]; + + if (!nr_entries) + return; + + for (i = 0; i < nr_entries; i++) { + if (!readmem(entries, KVADDR, &value, sizeof(value), "stack_trace", FAULT_ON_ERROR)) + break; + + entries += sizeof(ulong); + sp = value_search(value, &offset); + if (!sp) + break; + + fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf, 0)); + } + fprintf(fp, "\n"); +} + +static ulong gfp_migratetype(ulong gfp_flags) +{ + struct syment *sp; + int page_group_by_mobility_disabled; + + if ((sp = symbol_search("page_group_by_mobility_disabled"))) { + readmem(sp->value, KVADDR, &page_group_by_mobility_disabled, sizeof(int), + "page_group_by_mobility_disabled", RETURN_ON_ERROR); + if (page_group_by_mobility_disabled) { + ulong migrate_unmovable; + + enumerator_value("MIGRATE_UNMOVABLE", &migrate_unmovable); + return migrate_unmovable; + } + } + + return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; +} + +static void migratetype_name(ulong migratetype, char *buf) +{ + struct syment *sp; + ulong migratetype_name_addr; + + sp = symbol_search("migratetype_names"); + if (!sp) + return; + + readmem(sp->value + migratetype * sizeof(ulong), KVADDR, &migratetype_name_addr, + sizeof(ulong), "migratetype_name", RETURN_ON_ERROR); + read_string(migratetype_name_addr, buf, BUFSIZE-1); +} + +static void print_page_owner(ulong pfn, ulong page, char *page_owner, enum track_item alloc) +{ + int i, pid; + ushort order; + uint handle, free_handle, gfp_mask, nr_entries; + u64 ts_nsec, free_ts_nsec; + ulong entries, offset, page_flags; + struct syment *sp; + char buf[BUFSIZE]; + + order = USHORT(page_owner + OFFSET(page_owner_order)); + gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask)); + handle = UINT(page_owner + OFFSET(page_owner_handle)); + free_handle = UINT(page_owner + OFFSET(page_owner_free_handle)); + ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec)); + free_ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_free_ts_nsec)); + pid = INT(page_owner + OFFSET(page_owner_pid)); + + if (handle && (alloc != TRACK_FREE)) { + fprintf(fp, "Page allocated via order %u, mask %#x, pid %d, ts %llu ns\n", + order, gfp_mask, pid, ts_nsec); + migratetype_name(gfp_migratetype(gfp_mask), buf); + readmem(page+OFFSET(page_flags), KVADDR, &page_flags, sizeof(ulong), + "page.flags", FAULT_ON_ERROR); + fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf, page_flags); + nr_entries = stack_depot_fetch(handle, &entries); + stack_trace_print(entries, nr_entries); + } + + if (alloc != TRACK_ALLOC && + (free_handle = UINT(page_owner + OFFSET(page_owner_free_handle)))) { + nr_entries = stack_depot_fetch(handle, &entries); + fprintf(fp, "page last free ts %llu ns, stack trace:\n", free_ts_nsec); + stack_trace_print(entries, nr_entries); + } +} + +/* Get the max order for zoned buddy allocator */ +static inline ulong get_max_order(void) +{ + char *string; + + if ((kt->ikconfig_flags & IKCONFIG_AVAIL) && + get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) == IKCONFIG_STR) + return atol(string); + + return 11; +} + +#define MAX_ORDER get_max_order() +#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) + +static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext) +{ + int node; + ulong page_ext_size, section, section_nr, pgdat, node_page_ext, node_start_pfn, page_ext_idx; + + if (!kernel_symbol_exists("page_ext_size") || + !readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size, sizeof(page_ext_size), + "page_ext_size", QUIET|RETURN_ON_ERROR)) + return FALSE; + + if (IS_SPARSEMEM()) { + section_nr = pfn_to_section_nr(pfn); + if (!(section = valid_section_nr(section_nr))) + return FALSE; + + if (!readmem(section + OFFSET(mem_section_page_ext), KVADDR, &node_page_ext, sizeof(ulong), + "mem_section page_ext", FAULT_ON_ERROR) || !node_page_ext) + return FALSE; + + *page_ext = node_page_ext + pfn * page_ext_size; + return TRUE; + } + + if ((node = page_to_nid(pp) >= 0)) { + pgdat = vt->node_table[node].pgdat; + if (!VALID_MEMBER(pglist_data_node_page_ext) || + !readmem(pgdat + OFFSET(pglist_data_node_page_ext), KVADDR, &node_page_ext, + sizeof(ulong), "pglist node_page_ext", FAULT_ON_ERROR) || !node_page_ext) + return FALSE; + + if (!VALID_MEMBER(pglist_data_node_start_pfn) || + !readmem(pgdat + OFFSET(pglist_data_node_start_pfn), KVADDR, &node_start_pfn, + sizeof(ulong), "pglist node_start_pfn", FAULT_ON_ERROR) || !node_start_pfn) + return FALSE; + + page_ext_idx = pfn - rounddown(node_start_pfn, MAX_ORDER_NR_PAGES); + *page_ext = node_page_ext + pfn * page_ext_size; + return TRUE; + } + + return FALSE; +} + +static ulong get_page_owner(ulong page_ext) +{ + struct syment *sp; + ulong page_owner_ops_offset; + + sp = symbol_search("page_owner_ops"); + if (!sp) + return FALSE; + + if (!readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong), + "page_owner_ops_offset", RETURN_ON_ERROR)) + return FALSE; + + return page_ext + page_owner_ops_offset; +} + +static int page_owner_enabled(void) +{ + struct syment *sp; + int enabled; + + if ((sp = symbol_search("page_owner_enabled")) && + readmem(sp->value, KVADDR, &enabled, sizeof(int), "page_owner_enabled", RETURN_ON_ERROR)) + return enabled; + + if ((sp = symbol_search("page_owner_inited")) && + readmem(sp->value, KVADDR, &enabled, sizeof(int), "page_owner_inited", RETURN_ON_ERROR)) + return enabled; + + return FALSE; +} + +static void page_owner_init(void) +{ + if (page_owner_enabled()) { + STRUCT_SIZE_INIT(page_ext, "page_ext"); + STRUCT_SIZE_INIT(page_owner, "page_owner"); + MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section", "page_ext"); + MEMBER_OFFSET_INIT(page_owner_handle, "page_owner", "handle"); + MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner", "free_handle"); + MEMBER_OFFSET_INIT(page_owner_order, "page_owner", "order"); + MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner", "gfp_mask"); + MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner", "ts_nsec"); + MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner", "free_ts_nsec"); + MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid"); + } +} + +static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys) +{ + ulong pfn, page_ext_addr, page_owner_addr, page_ext; + long page_ext_owner, page_ext_owner_allocated; + char *page_owner; + + pfn = BTOP(phys); + if (!lookup_page_ext(pfn, pp, &page_ext_addr)) + return; + + page_owner_addr = get_page_owner(page_ext_addr); + if (!page_owner_addr) + return; + + page_owner = (char *)GETBUF(SIZE(page_owner)); + if (!readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner), "page_owner", FAULT_ON_ERROR)) + goto exit; + + enumerator_value("PAGE_EXT_OWNER", &page_ext_owner); + if (!readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong), "page_ext", FAULT_ON_ERROR) + || !(page_ext & (1 << page_ext_owner))) + goto exit; + + enumerator_value("PAGE_EXT_OWNER_ALLOCATED", &page_ext_owner_allocated); + if (mi->flags == GET_PAGE_OWNER) { + if (!(page_ext & (1 << page_ext_owner_allocated)) || + !IS_ALIGNED(pfn, 1 << USHORT(page_owner + OFFSET(page_owner_order)))) + goto exit; + + /* dump allocated page owner for current memory usage */ + print_page_owner(pfn, pp, page_owner, TRACK_ALLOC); + } else { + if (page_ext & (1 << page_ext_owner_allocated)) + fprintf(fp, "page_owner tracks the page 0x%lx as allocated\n", pp); + else + fprintf(fp, "page_owner tracks the page 0x%lx as freed\n", pp); + print_page_owner(pfn, pp, page_owner, TRACK_ALL); + } + +exit: + FREEBUF(page_owner); +} + static void dump_mem_map(struct meminfo *mi) { @@ -6161,6 +6537,18 @@ dump_mem_map(struct meminfo *mi) char style3[100]; char style4[100]; + if (mi->flags & GET_PAGE_OWNER) { + if (!page_owner_enabled()) { + error(INFO, "page_owner is disabled\n"); + return; + } + + if (!stack_depot_enabled()) { + error(INFO, "stack_depot is disabled\n"); + return; + } + } + if (IS_SPARSEMEM()) { dump_mem_map_SPARSEMEM(mi); return; @@ -6238,7 +6626,8 @@ dump_mem_map(struct meminfo *mi) switch (mi->flags) { - case ADDRESS_SPECIFIED: + case ADDRESS_SPECIFIED: + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: switch (mi->memtype) { case KVADDR: @@ -6263,6 +6652,10 @@ dump_mem_map(struct meminfo *mi) print_hdr = TRUE; break; + case GET_PAGE_OWNER: + print_hdr = FALSE; + break; + case GET_ALL: shared = 0; reserved = 0; @@ -6376,6 +6769,10 @@ dump_mem_map(struct meminfo *mi) /* FALLTHROUGH */ + case GET_PAGE_OWNER: + dump_page_owner(mi, pp, phys); + break; + case GET_SLAB_PAGES: if (v22) { if ((flags >> v22_PG_Slab) & 1) @@ -6570,7 +6967,10 @@ display_members: break; case ADDRESS_SPECIFIED: + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: mi->retval = done; + if (mi->flags & GET_PAGE_OWNER) + dump_page_owner(mi, pp, phys); break; } @@ -19618,6 +20018,99 @@ do_kmem_cache_slub(struct meminfo *si) FREEBUF(per_cpu); } +/* + * Return offset of the end of info block which is inuse + free pointer if + * not overlapping with object. + */ +static inline uint get_info_end(struct meminfo *si) +{ + uint inuse = UINT(si->cache_buf + OFFSET(kmem_cache_inuse)); + uint offset = UINT(si->cache_buf + OFFSET(kmem_cache_offset)); + + if (offset >= inuse) + return inuse + sizeof(void *); + else + return inuse; +} + +static inline u64 get_jiffies(void) +{ + ulong jiffies; + u64 jiffies_64; + + if (symbol_exists("jiffies_64")) { + get_symbol_data("jiffies_64", sizeof(u64), &jiffies_64); + return jiffies_64; + } else { + get_symbol_data("jiffies", sizeof(ulong), &jiffies); + return (u64)jiffies; + } +} + +#define TRACK_ADDRS_COUNT 16 +void print_track(struct meminfo *si, char *track, ulong object, enum track_item alloc) +{ + ulong track_addr, addr, addrs, when, entries, nr_entries; + uint i, cpu, pid, handle; + char buf[BUFSIZE]; + + track_addr = object + get_info_end(si) + alloc * STRUCT_SIZE("track"); + if (!readmem(track_addr, KVADDR, track, SIZE(track), "track", FAULT_ON_ERROR)) + return; + + addr = ULONG(track + OFFSET(track_addr)); + if (addr) { + when = ULONG(track + OFFSET(track_when)); + cpu = UINT(track + OFFSET(track_cpu)); + pid = UINT(track + OFFSET(track_pid)); + fprintf(fp, "object %lx %s in %s age=%llu cpu=%u pid=%d\n", + object, alloc ? "freed" : "allocated", value_to_symstr(addr, buf, 0), + get_jiffies() - (u64)when, cpu, pid); + if (VALID_MEMBER(track_addrs)) { + addrs = track_addr + OFFSET(track_addrs); + stack_trace_print(addrs, TRACK_ADDRS_COUNT); + } else if (VALID_MEMBER(track_handle)) { + handle = UINT(track + OFFSET(track_handle)); + nr_entries = stack_depot_fetch(handle, &entries); + stack_trace_print(entries, nr_entries); + } else { + fprintf(fp, "stack trace missing\n"); + handle = track_addr + OFFSET(track_handle); + nr_entries = stack_depot_fetch(handle, &entries); + stack_trace_print(entries, nr_entries); + } + } +} + +#define SLAB_STORE_USER (0x00010000UL) +static ulong get_slab_store_user_flag(void) +{ + ulong slab_store_user_flag; + + if (enumerator_value("_SLAB_STORE_USER", &slab_store_user_flag)) + return (1 << slab_store_user_flag); + else + return SLAB_STORE_USER; +} + +static void slab_debug_trace_show(struct meminfo *si, ulong object) +{ + ulong flags; + char *track; + + if (!(si->flags & GET_SLAB_DEBUG_TRACE)) + return; + + flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags)); + if (!(flags & get_slab_store_user_flag())) + return; + + track = (char *)GETBUF(SIZE(track)); + print_track(si, track, object, TRACK_ALLOC); + print_track(si, track, object, TRACK_FREE); + FREEBUF(track); +} + #define DUMP_SLAB_INFO_SLUB() \ { \ char b1[BUFSIZE], b2[BUFSIZE]; \ @@ -19672,7 +20165,8 @@ do_slab_slub(struct meminfo *si, int verbose) if (!verbose) { DUMP_SLAB_INFO_SLUB(); - return TRUE; + if (!(si->flags & GET_SLAB_DEBUG_TRACE)) + return TRUE; } cpu_freelist = 0; @@ -19775,7 +20269,8 @@ do_slab_slub(struct meminfo *si, int verbose) if (is_free && (cpu_slab >= 0)) fprintf(fp, "(cpu %d cache)", cpu_slab); fprintf(fp, "\n"); - + if (!is_free) + slab_debug_trace_show(si, p + red_left_pad); } return TRUE; @@ -19886,11 +20381,10 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node) } -#define SLAB_STORE_USER (0x00010000UL) flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags)); if (INVALID_MEMBER(kmem_cache_node_full) || - !(flags & SLAB_STORE_USER)) { + !(flags & get_slab_store_user_flag())) { fprintf(fp, "NODE %d FULL:\n (not tracked)\n", node); return; } -- 2.25.1 -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki