On Fri, Aug 30, 2024 at 10:32â?¯PM <qiwu.chen@xxxxxxxxxxxxx> wrote: > > Introduce -t flag for kmem command to support getting page owner and > slab debug trace. Here is the user help manual: > > 1. Dump page_owner allocated stack trace for each allocated page in > buddy system when used with "kmem -pt": > crash> kmem -pt > Page allocated via order 0, mask 0x1112c4a, pid 1, ts 16155269152 ns > PFN 0x40000, type Movable, Flags 0xffff00000020836 > set_page_owner+84 > post_alloc_hook+308 > prep_new_page+48 > get_page_from_freelist+736 > __alloc_pages+348 > alloc_pages+280 > __page_cache_alloc+120 > page_cache_ra_unbounded+272 > do_page_cache_ra+172 > do_sync_mmap_readahead+492 > filemap_fault+340 > __do_fault+64 > __handle_mm_fault+528 > handle_mm_fault+208 > __do_page_fault+232 > do_page_fault+1264 > ...... > > 2. Dump page_owner allocated/freed trace for an allocated page when used > "kmem -pt" with a page address. > > 3. Dump slab debug trace when used "-st" with an allocated slab object address: > crash> kmem -st ffff000007e79d00 > CACHE OBJSIZE ALLOCATED TOTAL SLABS SSIZE NAME > ffff000001c0ed00 3392 93 104 13 32k task_struct > SLAB MEMORY NODE TOTAL ALLOCATED FREE > fffffc00001f9e00 ffff000007e78000 0 8 6 2 > FREE / [ALLOCATED] > [ffff000007e79d00] > object ffff000007e79d00 allocated in alloc_task_struct_node+36 age=8431 cpu=2 pid=415 > __slab_alloc+60 > kmem_cache_alloc_node+528 > alloc_task_struct_node+36 > dup_task_struct+56 > copy_process+724 > kernel_clone+276 > __do_sys_clone+152 > __se_sys_clone+60 > __arm64_sys_clone+88 > __invoke_syscall+36 > invoke_syscall+284 > el0_svc_common+248 > do_el0_svc+56 > el0_svc+248 > el0t_64_sync_handler+92 > el0t_64_sync+344 > > object ffff000007e79d00 freed in free_task_struct+32 age=12132 cpu=1 pid=0 > kmem_cache_free+780 > free_task_struct+32 > free_task+164 > __put_task_struct+328 > put_task_struct+44 > delayed_put_task_struct+64 > rcu_do_batch+972 > rcu_core+592 > rcu_core_si+24 > __softirqentry_text_start+388 > do_softirq_own_stack+12 > invoke_softirq+216 > __irq_exit_rcu+164 > irq_exit+20 > handle_domain_irq+120 > > 4. Dump slab debug trace for each allocated object belongs to this slab > when used "-st" with an slab page address. > > 5. Dump slab debug trace for each allocated object belongs to slab cache > when used "-S -t" with a slab cache address. > > With this patch, the page allocation times can be sorted by page_owner_sort > tool easily, and the slab allocation/free times can be sorted by a script. > > Signed-off-by: qiwu.chen <qiwu.chen@xxxxxxxxxxxxx> > --- > defs.h | 50 ++++++ > help.c | 8 +- > memory.c | 538 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- > 3 files changed, 572 insertions(+), 24 deletions(-) > > diff --git a/defs.h b/defs.h > index dfbd241..e9eb9e3 100644 > --- a/defs.h > +++ b/defs.h > @@ -206,6 +206,34 @@ static inline int string_exists(char *s) { return (s ? TRUE : FALSE); } > #undef roundup > #endif > #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) > +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) > +#define rounddown(x, y) ((x) & ~__round_mask(x, y)) > +#define IS_ALIGNED(x, y) (((x) & ((typeof(x))(y) - 1)) == 0) > + > +/* stack depot macros before kernel commit 8151c7a35d8bd */ > +#define STACK_ALLOC_ALIGN 4 > +#define STACK_ALLOC_NULL_PROTECTION_BITS 1 > +#define STACK_ALLOC_ORDER 2 /* 'Slab' size order for stack depot, 4 pages */ > +#define STACK_ALLOC_OFFSET_BITS (STACK_ALLOC_ORDER + PAGESHIFT() - STACK_ALLOC_ALIGN) > +#define DEPOT_STACK_BITS (sizeof(uint) * 8) > +#define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ > + STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) > + > +/* stack depot macros since kernel commit 8151c7a35d8bd */ > +#define STACK_DEPOT_EXTRA_BITS 5 > +#define DEPOT_HANDLE_BITS (sizeof(uint) * 8) > +#define DEPOT_POOL_ORDER 2 /* Pool size order, 4 pages */ > +#define DEPOT_POOL_SIZE (1LL << (PAGESHIFT() + DEPOT_POOL_ORDER)) > +#define DEPOT_STACK_ALIGN 4 > +#define DEPOT_OFFSET_BITS (DEPOT_POOL_ORDER + PAGESHIFT() - DEPOT_STACK_ALIGN) > +#define DEPOT_POOL_INDEX_BITS (DEPOT_HANDLE_BITS - DEPOT_OFFSET_BITS - \ > + STACK_DEPOT_EXTRA_BITS) > + > +/* GFP flags */ > +#define __GFP_RECLAIMABLE 0x10u > +#define __GFP_MOVABLE 0x08u > +#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) > +#define GFP_MOVABLE_SHIFT 3 > > typedef uint64_t physaddr_t; > > @@ -1626,6 +1654,7 @@ struct offset_table { /* stash of commonly-used offsets */ > long pglist_data_node_present_pages; > long pglist_data_node_spanned_pages; > long pglist_data_bdata; > + long pglist_data_node_page_ext; Should be appended to the end of the struct, not create new members within the struct. > long page_cache_bucket_chain; > long zone_struct_free_pages; > long zone_struct_free_area; > @@ -2243,6 +2272,23 @@ struct offset_table { /* stash of commonly-used offsets */ > long vmap_node_busy; > long rb_list_head; > long file_f_inode; > + long stack_record_size; > + long stack_record_entries; > + long stack_record_count; > + long page_owner_order; > + long page_owner_gfp_mask; > + long page_owner_ts_nsec; > + long page_owner_free_ts_nsec; > + long page_owner_pid; > + long page_owner_handle; > + long page_owner_free_handle; > + long mem_section_page_ext; > + long track_addr; > + long track_addrs; > + long track_pid; > + long track_cpu; > + long track_when; > + long track_handle; > }; > > struct size_table { /* stash of commonly-used sizes */ > @@ -2419,6 +2465,10 @@ struct size_table { /* stash of commonly-used sizes */ > long module_memory; > long fred_frame; > long vmap_node; > + long page_ext; > + long page_owner; > + long stack_record; > + long track; > }; > > struct array_table { > diff --git a/help.c b/help.c > index e95ac1d..6a59064 100644 > --- a/help.c > +++ b/help.c > @@ -6815,8 +6815,8 @@ NULL > char *help_kmem[] = { > "kmem", > "kernel memory", > -"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p | -m member[,member]]\n" > -" [[-s|-S|-S=cpu[s]|-r] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]", > +"[-f|-F|-c|-C|-i|-v|-V|-n|-z|-o|-h] [-p|-t | -m member[,member]]\n" > +" [[-s|-S|-S=cpu[s]|-r|-t] [slab] [-I slab[,slab]]] [-g [flags]] [[-P] address]]", > " This command displays information about the use of kernel memory.\n", > " -f displays the contents of the system free memory headers.", > " also verifies that the page count equals nr_free_pages.", > @@ -6845,6 +6845,7 @@ char *help_kmem[] = { > " mem_map[] array, made up of the page struct address, its associated", > " physical address, the page.mapping, page.index, page._count and", > " page.flags fields.", > +" -t displays page_owner allocated stack trace for each allocated page in buddy system.", > " -m member similar to -p, but displays page structure contents specified by", > " a comma-separated list of one or more struct page members. The", > " \"flags\" member will always be expressed in hexadecimal format, and", > @@ -6893,12 +6894,15 @@ char *help_kmem[] = { > " address when used with -s or -S, searches the kmalloc() slab subsystem", > " for the slab containing of this virtual address, showing whether", > " it is in use or free.", > +" when added extra -t, displays the slab debug trace for the allocated", > +" object belongs to this slab", > " address when used with -f, the address can be either a page pointer,", > " a physical address, or a kernel virtual address; the free_area", > " header containing the page (if any) is displayed.", > " address when used with -p, the address can be either a page pointer, a", > " physical address, or a kernel virtual address; its basic mem_map", > " page information is displayed.", > +" when added extra -t, display the page_owner traces for this page", > " address when used with -m, the address can be either a page pointer, a", > " physical address, or a kernel virtual address; the specified", > " members of the associated page struct are displayed.", > diff --git a/memory.c b/memory.c > index a74ebaf..7bf8f86 100644 > --- a/memory.c > +++ b/memory.c > @@ -323,6 +323,11 @@ static ulong kmem_cache_nodelists(ulong); > static void dump_hstates(void); > static ulong freelist_ptr(struct meminfo *, ulong, ulong); > static ulong handle_each_vm_area(struct handle_each_vm_area_args *); > +static void page_owner_init(void); > +static int page_owner_enabled(void); > +static void stack_depot_init(void); > +static void dump_page_owner(struct meminfo *, ulong, physaddr_t); > +enum track_item { TRACK_ALLOC, TRACK_FREE, TRACK_ALL }; > > /* > * Memory display modes specific to this file. > @@ -860,6 +865,16 @@ vm_init(void) > "kmem_cache_node", "partial"); > MEMBER_OFFSET_INIT(kmem_cache_node_full, > "kmem_cache_node", "full"); > + > + STRUCT_SIZE_INIT(track, "track"); > + MEMBER_OFFSET_INIT(track_addr, "track", "addr"); > + if (MEMBER_EXISTS("track", "addrs")) > + MEMBER_OFFSET_INIT(track_addrs, "track", "addrs"); > + if (MEMBER_EXISTS("track", "handle")) > + MEMBER_OFFSET_INIT(track_handle, "track", "handle"); > + MEMBER_OFFSET_INIT(track_when, "track", "when"); > + MEMBER_OFFSET_INIT(track_cpu, "track", "cpu"); > + MEMBER_OFFSET_INIT(track_pid, "track", "pid"); > } else { > MEMBER_OFFSET_INIT(kmem_cache_s_c_nextp, > "kmem_cache_s", "c_nextp"); > @@ -983,6 +998,8 @@ vm_init(void) > vt->flags |= DISCONTIGMEM; > > sparse_mem_init(); > + page_owner_init(); > + stack_depot_init(); > > vt->vmalloc_start = machdep->vmalloc_start(); > if (IS_VMALLOC_ADDR(vt->mem_map)) > @@ -1099,6 +1116,8 @@ vm_init(void) > MEMBER_OFFSET_INIT(pglist_data_bdata, "pglist_data", "bdata"); > MEMBER_OFFSET_INIT(pglist_data_nr_zones, "pglist_data", > "nr_zones"); > + MEMBER_OFFSET_INIT(pglist_data_node_page_ext, "pglist_data", > + "node_page_ext"); > MEMBER_OFFSET_INIT(pglist_data_node_start_pfn, "pglist_data", > "node_start_pfn"); > MEMBER_OFFSET_INIT(pglist_data_pgdat_next, "pglist_data", > @@ -5037,6 +5056,8 @@ get_task_mem_usage(ulong task, struct task_mem_usage *tm) > #define SLAB_BITFIELD (ADDRESS_SPECIFIED << 25) > #define SLAB_GATHER_FAILURE (ADDRESS_SPECIFIED << 26) > #define GET_SLAB_ROOT_CACHES (ADDRESS_SPECIFIED << 27) > +#define GET_SLAB_DEBUG_TRACE (ADDRESS_SPECIFIED << 28) > +#define GET_PAGE_OWNER (ADDRESS_SPECIFIED << 29) > > #define GET_ALL \ > (GET_SHARED_PAGES|GET_TOTALRAM_PAGES|GET_BUFFERS_PAGES|GET_SLAB_PAGES) > @@ -5048,7 +5069,7 @@ cmd_kmem(void) > int c; > int sflag, Sflag, pflag, fflag, Fflag, vflag, zflag, oflag, gflag; > int nflag, cflag, Cflag, iflag, lflag, Lflag, Pflag, Vflag, hflag; > - int rflag; > + int rflag, tflag; > struct meminfo meminfo; > ulonglong value[MAXARGS]; > char buf[BUFSIZE]; > @@ -5061,13 +5082,13 @@ cmd_kmem(void) > spec_addr = choose_cpu = 0; > sflag = Sflag = pflag = fflag = Fflag = Pflag = zflag = oflag = 0; > vflag = Cflag = cflag = iflag = nflag = lflag = Lflag = Vflag = 0; > - gflag = hflag = rflag = 0; > + gflag = hflag = rflag = tflag = 0; > escape = FALSE; > BZERO(&meminfo, sizeof(struct meminfo)); > BZERO(&value[0], sizeof(ulonglong)*MAXARGS); > pc->curcmd_flags &= ~HEADER_PRINTED; > > - while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoh")) != EOF) { > + while ((c = getopt(argcnt, args, "gI:sS::rFfm:pvczCinl:L:PVoht")) != EOF) { > switch(c) > { > case 'V': > @@ -5204,6 +5225,10 @@ cmd_kmem(void) > gflag = 1; > break; > > + case 't': > + tflag = 1; > + break; > + > default: > argerrs++; > break; > @@ -5213,7 +5238,7 @@ cmd_kmem(void) > if (argerrs) > cmd_usage(pc->curcmd, SYNOPSIS); > > - if ((sflag + Sflag + pflag + fflag + Fflag + Vflag + oflag + > + if ((fflag + Fflag + Vflag + oflag + > vflag + Cflag + cflag + iflag + lflag + Lflag + gflag + > hflag + rflag) > 1) { > error(INFO, "only one flag allowed!\n"); > @@ -5264,10 +5289,13 @@ cmd_kmem(void) > if (pflag) { > meminfo.spec_addr = value[i]; > meminfo.flags = ADDRESS_SPECIFIED; > + if (tflag) > + meminfo.flags |= GET_PAGE_OWNER; Why put the tflag check within the pflag check? Do you think the following would be better? if (tflag) meminfo.flags |= GET_PAGE_OWNER; if (fplag) {...} > dump_mem_map(&meminfo); > pflag++; > } > > + ^^^ empty line, should be removed. Please check there are other similar cases which I won't list them all. > if (sflag || Sflag) { > if (vt->flags & KMEM_CACHE_UNAVAIL) > error(FATAL, > @@ -5292,6 +5320,8 @@ cmd_kmem(void) > meminfo.reqname = p1; > meminfo.cache = value[i]; > meminfo.flags |= CACHE_SET; > + if (tflag) > + meminfo.flags |= GET_SLAB_DEBUG_TRACE; > if ((i+1) == spec_addr) { /* done? */ > if (meminfo.calls++) > fprintf(fp, "\n"); > @@ -5301,6 +5331,8 @@ cmd_kmem(void) > } else { > meminfo.spec_addr = value[i]; > meminfo.flags = ADDRESS_SPECIFIED; > + if (tflag) > + meminfo.flags |= GET_SLAB_DEBUG_TRACE; > if (Sflag && (vt->flags & KMALLOC_SLUB)) > meminfo.flags |= VERBOSE; > if (meminfo.calls++) > @@ -5346,25 +5378,28 @@ cmd_kmem(void) > gflag++; > } > > - /* > - * no value arguments allowed! > - */ > - if (zflag || nflag || iflag || Fflag || Cflag || Lflag || > + if (tflag) > + tflag++; > + > + /* > + * no value arguments allowed! > + */ > + if (zflag || nflag || iflag || Fflag || Cflag || Lflag || > Vflag || oflag || hflag || rflag) { > error(INFO, > "no address arguments allowed with this option\n"); > cmd_usage(pc->curcmd, SYNOPSIS); > } > > - if (!(sflag + Sflag + pflag + fflag + vflag + cflag + > - lflag + Lflag + gflag)) { > + if (!(sflag + Sflag + pflag + fflag + vflag + cflag + > + lflag + Lflag + gflag + tflag)) { > meminfo.spec_addr = value[i]; > - meminfo.flags = ADDRESS_SPECIFIED; > - if (meminfo.calls++) > - fprintf(fp, "\n"); > + meminfo.flags = ADDRESS_SPECIFIED; > + if (meminfo.calls++) > + fprintf(fp, "\n"); > else > kmem_cache_init(); > - kmem_search(&meminfo); > + kmem_search(&meminfo); indent issues, please reformat the code hunk. > } > > } > @@ -5372,8 +5407,11 @@ cmd_kmem(void) > if (iflag == 1) > dump_kmeminfo(); > > - if (pflag == 1) > + if (pflag == 1) { > + if (tflag) > + meminfo.flags = GET_PAGE_OWNER; Same as before: if (tflag) meminfo.flags = GET_PAGE_OWNER; if (pflag == 1) {...} > dump_mem_map(&meminfo); > + } > > if (fflag == 1) > vt->dump_free_pages(&meminfo); > @@ -5457,7 +5495,7 @@ cmd_kmem(void) > if (!(sflag + Sflag + pflag + fflag + Fflag + vflag + > Vflag + zflag + oflag + cflag + Cflag + iflag + > nflag + lflag + Lflag + gflag + hflag + rflag + > - meminfo.calls)) > + tflag + meminfo.calls)) > cmd_usage(pc->curcmd, SYNOPSIS); > > } > @@ -5749,7 +5787,8 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi) > > switch (mi->flags) > { > - case ADDRESS_SPECIFIED: > + case ADDRESS_SPECIFIED: > + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: > switch (mi->memtype) > { > case KVADDR: > @@ -5774,6 +5813,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi) > print_hdr = TRUE; > break; > > + case GET_PAGE_OWNER: > + print_hdr = FALSE; > + break; > + > case GET_ALL: > shared = 0; > reserved = 0; > @@ -5926,6 +5969,10 @@ dump_mem_map_SPARSEMEM(struct meminfo *mi) > > /* FALLTHROUGH */ > > + case GET_PAGE_OWNER: > + dump_page_owner(mi, pp, phys); > + break; > + > case GET_SLAB_PAGES: > if (v22) { > if ((flags >> v22_PG_Slab) & 1) > @@ -6083,6 +6130,7 @@ display_members: > > if (done) > break; > + > } > > if (done) > @@ -6119,7 +6167,10 @@ display_members: > break; > > case ADDRESS_SPECIFIED: > + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: > mi->retval = done; > + if (mi->flags & GET_PAGE_OWNER) > + dump_page_owner(mi, pp, phys); > break; > } > > @@ -6129,6 +6180,331 @@ display_members: > FREEBUF(page_cache); > } > > +static int stack_depot_enabled(void) > +{ > + struct syment *sp; > + int disable = TRUE; > + > + if ((sp = symbol_search("stack_depot_disable"))) > + readmem(sp->value, KVADDR, &disable, sizeof(int), > + "stack_depot_disable", RETURN_ON_ERROR); > + else if ((sp = symbol_search("stack_depot_disabled"))) > + readmem(sp->value, KVADDR, &disable, sizeof(int), > + "stack_depot_disabled", RETURN_ON_ERROR); > + else if ((sp = symbol_search("stack_slabs"))) > + return sp->value ? FALSE : TRUE; > + > + return !disable; > +} > + > +static void stack_depot_init(void) > +{ > + if (stack_depot_enabled()) { > + STRUCT_SIZE_INIT(stack_record, "stack_record"); > + MEMBER_OFFSET_INIT(stack_record_size, "stack_record", "size"); > + MEMBER_OFFSET_INIT(stack_record_entries, "stack_record", "entries"); > + if (MEMBER_EXISTS("stack_record", "count")) > + MEMBER_OFFSET_INIT(stack_record_count, "stack_record", "count"); > + } > +} > + > +/* Fetch stack entries from a depot. */ > +static unsigned int stack_depot_fetch(uint handle, ulong *entries) > +{ > + struct syment *sp; > + uint valid, offset, slabindex, poolindex, pools_num, stack_record_count, stack_size = 0; > + ulong stack_record_addr, sym_value; > + > + if (!handle) > + return 0; > + > + if ((sp = symbol_search("stack_slabs"))) { > + valid = (handle >> (STACK_ALLOC_INDEX_BITS + STACK_ALLOC_OFFSET_BITS)) & STACK_ALLOC_NULL_PROTECTION_BITS; > + if (!valid) > + return 0; > + > + slabindex = handle & ((1 << STACK_ALLOC_INDEX_BITS) - 1); > + if (!readmem(sp->value + slabindex * sizeof(void *), KVADDR, &stack_record_addr, > + sizeof(void *), "stack_record_addr", FAULT_ON_ERROR)) > + return 0; > + > + offset = (handle >> STACK_ALLOC_INDEX_BITS) & ((1 << STACK_ALLOC_OFFSET_BITS) - 1); > + stack_record_addr += (offset << STACK_ALLOC_ALIGN); > + *entries = stack_record_addr + OFFSET(stack_record_entries); > + if (!readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size, sizeof(stack_size), > + "stack_record_entries", FAULT_ON_ERROR)) > + return 0; > + } else if ((sp = symbol_search("stack_pools")) && (sym_value = symbol_value("pools_num"))) { > + poolindex = handle & ((1 << DEPOT_POOL_INDEX_BITS) - 1); > + readmem(sym_value, KVADDR, &pools_num, sizeof(int), "pools_num", RETURN_ON_ERROR); > + if (poolindex >= pools_num) { > + error(INFO, "pool index %d out of bounds (%d) for stack id %08x\n", poolindex, pools_num, handle); > + return 0; > + } > + > + readmem(sp->value + (poolindex-1) * sizeof(void *), KVADDR, &stack_record_addr, > + sizeof(void *), "stack_record_addr", FAULT_ON_ERROR); > + if (!stack_record_addr) > + return 0; > + > + offset = (handle >> DEPOT_POOL_INDEX_BITS) & ((1 << DEPOT_OFFSET_BITS) - 1); > + stack_record_addr += (offset << DEPOT_STACK_ALIGN); > + if (!readmem(stack_record_addr + OFFSET(stack_record_count), KVADDR, &stack_record_count, > + sizeof(stack_record_count), "stack_record_count", FAULT_ON_ERROR) || !stack_record_count) > + return 0; > + > + *entries = stack_record_addr + OFFSET(stack_record_entries); > + if (!readmem(stack_record_addr + OFFSET(stack_record_size), KVADDR, &stack_size, sizeof(stack_size), > + "stack_record_entries", FAULT_ON_ERROR)) > + return 0; > + } > + > + return stack_size; > +} > + > +static void stack_trace_print(ulong entries, uint nr_entries) > +{ > + int i; > + struct syment *sp; > + ulong value, offset; > + char buf[BUFSIZE]; > + > + if (!nr_entries) > + return; > + > + for (i = 0; i < nr_entries; i++) { > + if (!readmem(entries, KVADDR, &value, sizeof(value), "stack_trace", FAULT_ON_ERROR)) > + break; > + > + entries += sizeof(ulong); > + sp = value_search(value, &offset); > + if (!sp) > + break; > + > + fprintf(fp, "%s\n", value_to_symstr(sp->value+offset, buf, 0)); > + } > + fprintf(fp, "\n"); > +} > + > +static ulong gfp_migratetype(ulong gfp_flags) > +{ > + struct syment *sp; > + int page_group_by_mobility_disabled; > + > + if ((sp = symbol_search("page_group_by_mobility_disabled"))) { > + readmem(sp->value, KVADDR, &page_group_by_mobility_disabled, sizeof(int), > + "page_group_by_mobility_disabled", RETURN_ON_ERROR); > + if (page_group_by_mobility_disabled) { > + ulong migrate_unmovable; > + > + enumerator_value("MIGRATE_UNMOVABLE", &migrate_unmovable); > + return migrate_unmovable; > + } > + } > + > + return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; > +} > + > +static void migratetype_name(ulong migratetype, char *buf) > +{ > + struct syment *sp; > + ulong migratetype_name_addr; > + > + sp = symbol_search("migratetype_names"); > + if (!sp) > + return; > + > + readmem(sp->value + migratetype * sizeof(ulong), KVADDR, &migratetype_name_addr, > + sizeof(ulong), "migratetype_name", RETURN_ON_ERROR); > + read_string(migratetype_name_addr, buf, BUFSIZE-1); > +} > + > +static void print_page_owner(ulong pfn, ulong page, char *page_owner, enum track_item alloc) > +{ > + int i, pid; > + ushort order; > + uint handle, free_handle, gfp_mask, nr_entries; > + u64 ts_nsec, free_ts_nsec; > + ulong entries, offset, page_flags; > + struct syment *sp; > + char buf[BUFSIZE]; > + > + order = USHORT(page_owner + OFFSET(page_owner_order)); > + gfp_mask = UINT(page_owner + OFFSET(page_owner_gfp_mask)); > + handle = UINT(page_owner + OFFSET(page_owner_handle)); > + free_handle = UINT(page_owner + OFFSET(page_owner_free_handle)); > + ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_ts_nsec)); > + free_ts_nsec = ULONGLONG(page_owner + OFFSET(page_owner_free_ts_nsec)); > + pid = INT(page_owner + OFFSET(page_owner_pid)); > + > + if (handle && (alloc != TRACK_FREE)) { > + fprintf(fp, "Page allocated via order %u, mask %#x, pid %d, ts %llu ns\n", > + order, gfp_mask, pid, ts_nsec); > + migratetype_name(gfp_migratetype(gfp_mask), buf); > + readmem(page+OFFSET(page_flags), KVADDR, &page_flags, sizeof(ulong), > + "page.flags", FAULT_ON_ERROR); > + fprintf(fp, "PFN %#lx, type %s, Flags %#lx\n", pfn, buf, page_flags); > + nr_entries = stack_depot_fetch(handle, &entries); > + stack_trace_print(entries, nr_entries); > + } > + > + if (alloc != TRACK_ALLOC && > + (free_handle = UINT(page_owner + OFFSET(page_owner_free_handle)))) { > + nr_entries = stack_depot_fetch(handle, &entries); > + fprintf(fp, "page last free ts %llu ns, stack trace:\n", free_ts_nsec); > + stack_trace_print(entries, nr_entries); > + } > +} > + > +/* Get the max order for zoned buddy allocator */ > +static inline ulong get_max_order(void) > +{ > + char *string; > + > + if ((kt->ikconfig_flags & IKCONFIG_AVAIL) && > + get_kernel_config("CONFIG_FORCE_MAX_ZONEORDER", &string) == IKCONFIG_STR) > + return atol(string); > + > + return 11; > +} > + > +#define MAX_ORDER get_max_order() > +#define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1)) > + > +static int lookup_page_ext(ulong pfn, ulong pp, ulong *page_ext) > +{ > + int node; > + ulong page_ext_size, section, section_nr, pgdat, node_page_ext, node_start_pfn, page_ext_idx; > + > + if (!kernel_symbol_exists("page_ext_size") || > + !readmem(symbol_value("page_ext_size"), KVADDR, &page_ext_size, sizeof(page_ext_size), > + "page_ext_size", QUIET|RETURN_ON_ERROR)) > + return FALSE; > + > + if (IS_SPARSEMEM()) { > + section_nr = pfn_to_section_nr(pfn); > + if (!(section = valid_section_nr(section_nr))) > + return FALSE; > + > + if (!readmem(section + OFFSET(mem_section_page_ext), KVADDR, &node_page_ext, sizeof(ulong), > + "mem_section page_ext", FAULT_ON_ERROR) || !node_page_ext) > + return FALSE; > + > + *page_ext = node_page_ext + pfn * page_ext_size; > + return TRUE; > + } > + > + if ((node = page_to_nid(pp) >= 0)) { > + pgdat = vt->node_table[node].pgdat; > + if (!VALID_MEMBER(pglist_data_node_page_ext) || > + !readmem(pgdat + OFFSET(pglist_data_node_page_ext), KVADDR, &node_page_ext, > + sizeof(ulong), "pglist node_page_ext", FAULT_ON_ERROR) || !node_page_ext) > + return FALSE; > + > + if (!VALID_MEMBER(pglist_data_node_start_pfn) || > + !readmem(pgdat + OFFSET(pglist_data_node_start_pfn), KVADDR, &node_start_pfn, > + sizeof(ulong), "pglist node_start_pfn", FAULT_ON_ERROR) || !node_start_pfn) > + return FALSE; > + > + page_ext_idx = pfn - rounddown(node_start_pfn, MAX_ORDER_NR_PAGES); This variable is not used, can you check its usage? Also for this large inserting hunk, please reformat your patch, keep the length of code no longer than 80 chars. > + *page_ext = node_page_ext + pfn * page_ext_size; > + return TRUE; > + } > + > + return FALSE; > +} > + > +static ulong get_page_owner(ulong page_ext) > +{ > + struct syment *sp; > + ulong page_owner_ops_offset; > + > + sp = symbol_search("page_owner_ops"); > + if (!sp) > + return FALSE; > + > + if (!readmem(sp->value, KVADDR, &page_owner_ops_offset, sizeof(ulong), > + "page_owner_ops_offset", RETURN_ON_ERROR)) > + return FALSE; > + > + return page_ext + page_owner_ops_offset; > +} > + > +static int page_owner_enabled(void) > +{ > + struct syment *sp; > + int enabled; > + > + if ((sp = symbol_search("page_owner_enabled")) && > + readmem(sp->value, KVADDR, &enabled, sizeof(int), "page_owner_enabled", RETURN_ON_ERROR)) > + return enabled; > + > + if ((sp = symbol_search("page_owner_inited")) && > + readmem(sp->value, KVADDR, &enabled, sizeof(int), "page_owner_inited", RETURN_ON_ERROR)) > + return enabled; > + > + return FALSE; > +} > + > +static void page_owner_init(void) > +{ > + if (page_owner_enabled()) { > + STRUCT_SIZE_INIT(page_ext, "page_ext"); > + STRUCT_SIZE_INIT(page_owner, "page_owner"); > + MEMBER_OFFSET_INIT(mem_section_page_ext, "mem_section", "page_ext"); > + MEMBER_OFFSET_INIT(page_owner_handle, "page_owner", "handle"); > + MEMBER_OFFSET_INIT(page_owner_free_handle, "page_owner", "free_handle"); > + MEMBER_OFFSET_INIT(page_owner_order, "page_owner", "order"); > + MEMBER_OFFSET_INIT(page_owner_gfp_mask, "page_owner", "gfp_mask"); > + MEMBER_OFFSET_INIT(page_owner_ts_nsec, "page_owner", "ts_nsec"); > + MEMBER_OFFSET_INIT(page_owner_free_ts_nsec, "page_owner", "free_ts_nsec"); > + MEMBER_OFFSET_INIT(page_owner_pid, "page_owner", "pid"); > + } > +} > + > +static void dump_page_owner(struct meminfo *mi, ulong pp, physaddr_t phys) > +{ > + ulong pfn, page_ext_addr, page_owner_addr, page_ext; > + long page_ext_owner, page_ext_owner_allocated; > + char *page_owner; > + > + pfn = BTOP(phys); > + if (!lookup_page_ext(pfn, pp, &page_ext_addr)) > + return; > + > + page_owner_addr = get_page_owner(page_ext_addr); > + if (!page_owner_addr) > + return; > + > + page_owner = (char *)GETBUF(SIZE(page_owner)); > + if (!readmem(page_owner_addr, KVADDR, page_owner, SIZE(page_owner), "page_owner", FAULT_ON_ERROR)) > + goto exit; > + > + enumerator_value("PAGE_EXT_OWNER", &page_ext_owner); > + if (!readmem(page_ext_addr, KVADDR, &page_ext, sizeof(ulong), "page_ext", FAULT_ON_ERROR) > + || !(page_ext & (1 << page_ext_owner))) > + goto exit; > + > + enumerator_value("PAGE_EXT_OWNER_ALLOCATED", &page_ext_owner_allocated); > + if (mi->flags == GET_PAGE_OWNER) { > + if (!(page_ext & (1 << page_ext_owner_allocated)) || > + !IS_ALIGNED(pfn, 1 << USHORT(page_owner + OFFSET(page_owner_order)))) > + goto exit; > + > + /* dump allocated page owner for current memory usage */ > + print_page_owner(pfn, pp, page_owner, TRACK_ALLOC); > + } else { > + if (page_ext & (1 << page_ext_owner_allocated)) > + fprintf(fp, "page_owner tracks the page 0x%lx as allocated\n", pp); > + else > + fprintf(fp, "page_owner tracks the page 0x%lx as freed\n", pp); > + print_page_owner(pfn, pp, page_owner, TRACK_ALL); > + } > + > +exit: > + FREEBUF(page_owner); > +} > + > static void > dump_mem_map(struct meminfo *mi) > { > @@ -6161,6 +6537,18 @@ dump_mem_map(struct meminfo *mi) > char style3[100]; > char style4[100]; > > + if (mi->flags & GET_PAGE_OWNER) { > + if (!page_owner_enabled()) { > + error(INFO, "page_owner is disabled\n"); > + return; > + } > + > + if (!stack_depot_enabled()) { > + error(INFO, "stack_depot is disabled\n"); > + return; > + } > + } > + > if (IS_SPARSEMEM()) { > dump_mem_map_SPARSEMEM(mi); > return; > @@ -6238,7 +6626,8 @@ dump_mem_map(struct meminfo *mi) > > switch (mi->flags) > { > - case ADDRESS_SPECIFIED: > + case ADDRESS_SPECIFIED: > + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: > switch (mi->memtype) > { > case KVADDR: > @@ -6263,6 +6652,10 @@ dump_mem_map(struct meminfo *mi) > print_hdr = TRUE; > break; > > + case GET_PAGE_OWNER: > + print_hdr = FALSE; > + break; > + > case GET_ALL: > shared = 0; > reserved = 0; > @@ -6376,6 +6769,10 @@ dump_mem_map(struct meminfo *mi) > > /* FALLTHROUGH */ > > + case GET_PAGE_OWNER: > + dump_page_owner(mi, pp, phys); > + break; > + > case GET_SLAB_PAGES: > if (v22) { > if ((flags >> v22_PG_Slab) & 1) > @@ -6570,7 +6967,10 @@ display_members: > break; > > case ADDRESS_SPECIFIED: > + case ADDRESS_SPECIFIED|GET_PAGE_OWNER: > mi->retval = done; > + if (mi->flags & GET_PAGE_OWNER) > + dump_page_owner(mi, pp, phys); > break; > } > > @@ -19618,6 +20018,99 @@ do_kmem_cache_slub(struct meminfo *si) > FREEBUF(per_cpu); > } > > +/* > + * Return offset of the end of info block which is inuse + free pointer if > + * not overlapping with object. > + */ > +static inline uint get_info_end(struct meminfo *si) > +{ > + uint inuse = UINT(si->cache_buf + OFFSET(kmem_cache_inuse)); > + uint offset = UINT(si->cache_buf + OFFSET(kmem_cache_offset)); > + > + if (offset >= inuse) > + return inuse + sizeof(void *); > + else > + return inuse; > +} > + > +static inline u64 get_jiffies(void) > +{ > + ulong jiffies; > + u64 jiffies_64; > + > + if (symbol_exists("jiffies_64")) { > + get_symbol_data("jiffies_64", sizeof(u64), &jiffies_64); > + return jiffies_64; > + } else { > + get_symbol_data("jiffies", sizeof(ulong), &jiffies); > + return (u64)jiffies; > + } > +} There is no need to write a new function for it, we can reuse the existing function get_uptime(). > + > +#define TRACK_ADDRS_COUNT 16 > +void print_track(struct meminfo *si, char *track, ulong object, enum track_item alloc) > +{ > + ulong track_addr, addr, addrs, when, entries, nr_entries; > + uint i, cpu, pid, handle; > + char buf[BUFSIZE]; > + > + track_addr = object + get_info_end(si) + alloc * STRUCT_SIZE("track"); > + if (!readmem(track_addr, KVADDR, track, SIZE(track), "track", FAULT_ON_ERROR)) > + return; > + > + addr = ULONG(track + OFFSET(track_addr)); > + if (addr) { > + when = ULONG(track + OFFSET(track_when)); > + cpu = UINT(track + OFFSET(track_cpu)); > + pid = UINT(track + OFFSET(track_pid)); > + fprintf(fp, "object %lx %s in %s age=%llu cpu=%u pid=%d\n", > + object, alloc ? "freed" : "allocated", value_to_symstr(addr, buf, 0), > + get_jiffies() - (u64)when, cpu, pid); > + if (VALID_MEMBER(track_addrs)) { > + addrs = track_addr + OFFSET(track_addrs); > + stack_trace_print(addrs, TRACK_ADDRS_COUNT); > + } else if (VALID_MEMBER(track_handle)) { > + handle = UINT(track + OFFSET(track_handle)); > + nr_entries = stack_depot_fetch(handle, &entries); > + stack_trace_print(entries, nr_entries); > + } else { > + fprintf(fp, "stack trace missing\n"); > + handle = track_addr + OFFSET(track_handle); > + nr_entries = stack_depot_fetch(handle, &entries); > + stack_trace_print(entries, nr_entries); > + } > + } > +} > + > +#define SLAB_STORE_USER (0x00010000UL) > +static ulong get_slab_store_user_flag(void) > +{ > + ulong slab_store_user_flag; > + > + if (enumerator_value("_SLAB_STORE_USER", &slab_store_user_flag)) > + return (1 << slab_store_user_flag); > + else > + return SLAB_STORE_USER; > +} > + > +static void slab_debug_trace_show(struct meminfo *si, ulong object) > +{ > + ulong flags; > + char *track; > + > + if (!(si->flags & GET_SLAB_DEBUG_TRACE)) > + return; better put the check into the caller: if (si->flags & GET_SLAB_DEBUG_TRACE) slab_debug_trace_show(); > + > + flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags)); > + if (!(flags & get_slab_store_user_flag())) > + return; > + > + track = (char *)GETBUF(SIZE(track)); > + print_track(si, track, object, TRACK_ALLOC); > + print_track(si, track, object, TRACK_FREE); > + FREEBUF(track); > +} > + > #define DUMP_SLAB_INFO_SLUB() \ > { \ > char b1[BUFSIZE], b2[BUFSIZE]; \ > @@ -19672,7 +20165,8 @@ do_slab_slub(struct meminfo *si, int verbose) > > if (!verbose) { > DUMP_SLAB_INFO_SLUB(); > - return TRUE; > + if (!(si->flags & GET_SLAB_DEBUG_TRACE)) > + return TRUE; > } > > cpu_freelist = 0; > @@ -19775,7 +20269,8 @@ do_slab_slub(struct meminfo *si, int verbose) > if (is_free && (cpu_slab >= 0)) > fprintf(fp, "(cpu %d cache)", cpu_slab); > fprintf(fp, "\n"); > - > + if (!is_free) > + slab_debug_trace_show(si, p + red_left_pad); > } > > return TRUE; > @@ -19886,11 +20381,10 @@ do_node_lists_slub(struct meminfo *si, ulong node_ptr, int node) > > } > > -#define SLAB_STORE_USER (0x00010000UL) > flags = ULONG(si->cache_buf + OFFSET(kmem_cache_flags)); > > if (INVALID_MEMBER(kmem_cache_node_full) || > - !(flags & SLAB_STORE_USER)) { > + !(flags & get_slab_store_user_flag())) { > fprintf(fp, "NODE %d FULL:\n (not tracked)\n", node); > return; > } > -- > 2.25.1 > -- > Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx > To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx > https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ > Contribution Guidelines: https://github.com/crash-utility/crash/wiki -- Crash-utility mailing list -- devel@xxxxxxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to devel-leave@xxxxxxxxxxxxxxxxxxxxxxxxxxx https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/ Contribution Guidelines: https://github.com/crash-utility/crash/wiki