Em Tue, Apr 21, 2015 at 01:55:02PM +0900, Namhyung Kim escreveu: > It perf kmem support caller statistics for page. Unlike slab case, > the tracepoints in page allocator don't provide callsite info. So > it records with callchain and extracts callsite info. > > Note that the callchain contains several memory allocation functions > which has no meaning for users. So skip those functions to get proper > callsites. I used following regex pattern to skip the allocator > functions: > > ^_?_?(alloc|get_free|get_zeroed)_pages? > > This gave me a following list of functions: > > # perf kmem record --page sleep 3 > # perf kmem stat --page -v > ... > alloc func: __get_free_pages > alloc func: get_zeroed_page > alloc func: alloc_pages_exact > alloc func: __alloc_pages_direct_compact > alloc func: __alloc_pages_nodemask > alloc func: alloc_page_interleave > alloc func: alloc_pages_current > alloc func: alloc_pages_vma > alloc func: alloc_page_buffers > alloc func: alloc_pages_exact_nid ... > > The output looks mostly same as --alloc (I also added callsite column > to that) but groups entries by callsite. Currently, the order, > migrate type and GFP flag info is for the last allocation and not > guaranteed to be same for all allocations from the callsite. In my testing: [root@ssdandy ~]# perf kmem stat --page --caller ------------------------------------------------------------------------------ TotalAlloc(KB)|Hits|Ord| Mig.type |GFP flags| Callsite ------------------------------------------------------------------------------ 492 | 21 | 3 | UNMOVABL | 0235200 | new_slab 456 |114 | 0 | UNMOVABL | 00202d0 | iwl_pcie_rx_replenish 60 | 15 | 0 | UNMOVABL | 02284d0 | pte_alloc_one 44 | 11 | 0 | MOVABLE | 00280da | handle_mm_fault 28 | 7 | 0 | MOVABLE | 00200da | do_wp_page 16 | 4 | 0 | UNMOVABL | 00284d0 | __pmd_alloc 16 | 4 | 0 | UNMOVABL | 0020200 | __tlb_remove_page 12 | 3 | 0 | MOVABLE | 00200da | handle_mm_fault 8 | 2 | 0 | UNMOVABL | 00284d0 | __pud_alloc 4 | 1 | 0 | UNMOVABL | 0020250 | ftrace_define_fields_xfs_ag_class 4 | 1 | 0 | UNMOVABL | 0020010 | bio_copy_kern 4 | 1 | 0 | UNMOVABL | 00200d0 | __pollwait 4 | 1 | 0 | UNMOVABL | 00200d2 | pipe_write 4 | 1 | 0 | MOVABLE | 00280da | do_wp_page 4 | 1 | 0 | UNMOVABL | 02284d0 | pgd_alloc --------------------------------------------------------------------------------- Probably that new_slab() one should go into the regexp? [acme@ssdandy linux]$ uname -a Linux ssdandy 4.0.0-rc6+ #3 SMP Mon Apr 13 16:45:57 BRT 2015 x86_64 x86_64 x86_64 GNU/Linux [acme@ssdandy linux]$ grep SL.B /lib/modules/`uname -r`/build/.config CONFIG_SLUB_DEBUG=y # CONFIG_SLAB is not set CONFIG_SLUB=y CONFIG_SLUB_CPU_PARTIAL=y CONFIG_SLABINFO=y # CONFIG_SLUB_DEBUG_ON is not set # CONFIG_SLUB_STATS is not set [acme@ssdandy linux]$ - Arnaldo > --------------------------------------------------------------------------------------------- > Total_alloc (KB) | Hits | Order | Mig.type | GFP flags | Callsite > --------------------------------------------------------------------------------------------- > 1,064 | 266 | 0 | UNMOVABL | 000000d0 | __pollwait > 52 | 13 | 0 | UNMOVABL | 002084d0 | pte_alloc_one > 44 | 11 | 0 | MOVABLE | 000280da | handle_mm_fault > 20 | 5 | 0 | MOVABLE | 000200da | do_cow_fault > 20 | 5 | 0 | MOVABLE | 000200da | do_wp_page > 16 | 4 | 0 | UNMOVABL | 000084d0 | __pmd_alloc > 16 | 4 | 0 | UNMOVABL | 00000200 | __tlb_remove_page > 12 | 3 | 0 | UNMOVABL | 000084d0 | __pud_alloc > 8 | 2 | 0 | UNMOVABL | 00000010 | bio_copy_user_iov > 4 | 1 | 0 | UNMOVABL | 000200d2 | pipe_write > 4 | 1 | 0 | MOVABLE | 000280da | do_wp_page > 4 | 1 | 0 | UNMOVABL | 002084d0 | pgd_alloc > --------------------------------------------------------------------------------------------- > > Acked-by: Pekka Enberg <penberg@xxxxxxxxxx> > Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx> > --- > tools/perf/builtin-kmem.c | 327 +++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 306 insertions(+), 21 deletions(-) > > diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c > index 4f0f38462d97..3649eec6807f 100644 > --- a/tools/perf/builtin-kmem.c > +++ b/tools/perf/builtin-kmem.c > @@ -10,6 +10,7 @@ > #include "util/header.h" > #include "util/session.h" > #include "util/tool.h" > +#include "util/callchain.h" > > #include "util/parse-options.h" > #include "util/trace-event.h" > @@ -21,6 +22,7 @@ > #include <linux/rbtree.h> > #include <linux/string.h> > #include <locale.h> > +#include <regex.h> > > static int kmem_slab; > static int kmem_page; > @@ -241,6 +243,7 @@ static unsigned long nr_page_fails; > static unsigned long nr_page_nomatch; > > static bool use_pfn; > +static struct perf_session *kmem_session; > > #define MAX_MIGRATE_TYPES 6 > #define MAX_PAGE_ORDER 11 > @@ -250,6 +253,7 @@ static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES]; > struct page_stat { > struct rb_node node; > u64 page; > + u64 callsite; > int order; > unsigned gfp_flags; > unsigned migrate_type; > @@ -262,8 +266,144 @@ struct page_stat { > static struct rb_root page_tree; > static struct rb_root page_alloc_tree; > static struct rb_root page_alloc_sorted; > +static struct rb_root page_caller_tree; > +static struct rb_root page_caller_sorted; > > -static struct page_stat *search_page(unsigned long page, bool create) > +struct alloc_func { > + u64 start; > + u64 end; > + char *name; > +}; > + > +static int nr_alloc_funcs; > +static struct alloc_func *alloc_func_list; > + > +static int funcmp(const void *a, const void *b) > +{ > + const struct alloc_func *fa = a; > + const struct alloc_func *fb = b; > + > + if (fa->start > fb->start) > + return 1; > + else > + return -1; > +} > + > +static int callcmp(const void *a, const void *b) > +{ > + const struct alloc_func *fa = a; > + const struct alloc_func *fb = b; > + > + if (fb->start <= fa->start && fa->end < fb->end) > + return 0; > + > + if (fa->start > fb->start) > + return 1; > + else > + return -1; > +} > + > +static int build_alloc_func_list(void) > +{ > + int ret; > + struct map *kernel_map; > + struct symbol *sym; > + struct rb_node *node; > + struct alloc_func *func; > + struct machine *machine = &kmem_session->machines.host; > + regex_t alloc_func_regex; > + const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; > + > + ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); > + if (ret) { > + char err[BUFSIZ]; > + > + regerror(ret, &alloc_func_regex, err, sizeof(err)); > + pr_err("Invalid regex: %s\n%s", pattern, err); > + return -EINVAL; > + } > + > + kernel_map = machine->vmlinux_maps[MAP__FUNCTION]; > + if (map__load(kernel_map, NULL) < 0) { > + pr_err("cannot load kernel map\n"); > + return -ENOENT; > + } > + > + map__for_each_symbol(kernel_map, sym, node) { > + if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0)) > + continue; > + > + func = realloc(alloc_func_list, > + (nr_alloc_funcs + 1) * sizeof(*func)); > + if (func == NULL) > + return -ENOMEM; > + > + pr_debug("alloc func: %s\n", sym->name); > + func[nr_alloc_funcs].start = sym->start; > + func[nr_alloc_funcs].end = sym->end; > + func[nr_alloc_funcs].name = sym->name; > + > + alloc_func_list = func; > + nr_alloc_funcs++; > + } > + > + qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp); > + > + regfree(&alloc_func_regex); > + return 0; > +} > + > +/* > + * Find first non-memory allocation function from callchain. > + * The allocation functions are in the 'alloc_func_list'. > + */ > +static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample) > +{ > + struct addr_location al; > + struct machine *machine = &kmem_session->machines.host; > + struct callchain_cursor_node *node; > + > + if (alloc_func_list == NULL) { > + if (build_alloc_func_list() < 0) > + goto out; > + } > + > + al.thread = machine__findnew_thread(machine, sample->pid, sample->tid); > + sample__resolve_callchain(sample, NULL, evsel, &al, 16); > + > + callchain_cursor_commit(&callchain_cursor); > + while (true) { > + struct alloc_func key, *caller; > + u64 addr; > + > + node = callchain_cursor_current(&callchain_cursor); > + if (node == NULL) > + break; > + > + key.start = key.end = node->ip; > + caller = bsearch(&key, alloc_func_list, nr_alloc_funcs, > + sizeof(key), callcmp); > + if (!caller) { > + /* found */ > + if (node->map) > + addr = map__unmap_ip(node->map, node->ip); > + else > + addr = node->ip; > + > + return addr; > + } else > + pr_debug3("skipping alloc function: %s\n", caller->name); > + > + callchain_cursor_advance(&callchain_cursor); > + } > + > +out: > + pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip); > + return sample->ip; > +} > + > +static struct page_stat * > +__page_stat__findnew_page(u64 page, bool create) > { > struct rb_node **node = &page_tree.rb_node; > struct rb_node *parent = NULL; > @@ -298,6 +438,16 @@ static struct page_stat *search_page(unsigned long page, bool create) > return data; > } > > +static struct page_stat *page_stat__find_page(u64 page) > +{ > + return __page_stat__findnew_page(page, false); > +} > + > +static struct page_stat *page_stat__findnew_page(u64 page) > +{ > + return __page_stat__findnew_page(page, true); > +} > + > static int page_stat_cmp(struct page_stat *a, struct page_stat *b) > { > if (a->page > b->page) > @@ -319,7 +469,8 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b) > return 0; > } > > -static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create) > +static struct page_stat * > +__page_stat__findnew_alloc(struct page_stat *pstat, bool create) > { > struct rb_node **node = &page_alloc_tree.rb_node; > struct rb_node *parent = NULL; > @@ -357,6 +508,62 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool cr > return data; > } > > +static struct page_stat *page_stat__find_alloc(struct page_stat *pstat) > +{ > + return __page_stat__findnew_alloc(pstat, false); > +} > + > +static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat) > +{ > + return __page_stat__findnew_alloc(pstat, true); > +} > + > +static struct page_stat * > +__page_stat__findnew_caller(u64 callsite, bool create) > +{ > + struct rb_node **node = &page_caller_tree.rb_node; > + struct rb_node *parent = NULL; > + struct page_stat *data; > + > + while (*node) { > + s64 cmp; > + > + parent = *node; > + data = rb_entry(*node, struct page_stat, node); > + > + cmp = data->callsite - callsite; > + if (cmp < 0) > + node = &parent->rb_left; > + else if (cmp > 0) > + node = &parent->rb_right; > + else > + return data; > + } > + > + if (!create) > + return NULL; > + > + data = zalloc(sizeof(*data)); > + if (data != NULL) { > + data->callsite = callsite; > + > + rb_link_node(&data->node, parent, node); > + rb_insert_color(&data->node, &page_caller_tree); > + } > + > + return data; > +} > + > +static struct page_stat *page_stat__find_caller(u64 callsite) > +{ > + return __page_stat__findnew_caller(callsite, false); > +} > + > +static struct page_stat *page_stat__findnew_caller(u64 callsite) > +{ > + return __page_stat__findnew_caller(callsite, true); > +} > + > static bool valid_page(u64 pfn_or_page) > { > if (use_pfn && pfn_or_page == -1UL) > @@ -375,6 +582,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, > unsigned int migrate_type = perf_evsel__intval(evsel, sample, > "migratetype"); > u64 bytes = kmem_page_size << order; > + u64 callsite; > struct page_stat *pstat; > struct page_stat this = { > .order = order, > @@ -397,25 +605,40 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, > return 0; > } > > + callsite = find_callsite(evsel, sample); > + > /* > * This is to find the current page (with correct gfp flags and > * migrate type) at free event. > */ > - pstat = search_page(page, true); > + pstat = page_stat__findnew_page(page); > if (pstat == NULL) > return -ENOMEM; > > pstat->order = order; > pstat->gfp_flags = gfp_flags; > pstat->migrate_type = migrate_type; > + pstat->callsite = callsite; > > this.page = page; > - pstat = search_page_alloc_stat(&this, true); > + pstat = page_stat__findnew_alloc(&this); > if (pstat == NULL) > return -ENOMEM; > > pstat->nr_alloc++; > pstat->alloc_bytes += bytes; > + pstat->callsite = callsite; > + > + pstat = page_stat__findnew_caller(callsite); > + if (pstat == NULL) > + return -ENOMEM; > + > + pstat->order = order; > + pstat->gfp_flags = gfp_flags; > + pstat->migrate_type = migrate_type; > + > + pstat->nr_alloc++; > + pstat->alloc_bytes += bytes; > > order_stats[order][migrate_type]++; > > @@ -441,7 +664,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, > nr_page_frees++; > total_page_free_bytes += bytes; > > - pstat = search_page(page, false); > + pstat = page_stat__find_page(page); > if (pstat == NULL) { > pr_debug2("missing free at page %"PRIx64" (order: %d)\n", > page, order); > @@ -455,11 +678,19 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, > this.page = page; > this.gfp_flags = pstat->gfp_flags; > this.migrate_type = pstat->migrate_type; > + this.callsite = pstat->callsite; > > rb_erase(&pstat->node, &page_tree); > free(pstat); > > - pstat = search_page_alloc_stat(&this, false); > + pstat = page_stat__find_alloc(&this); > + if (pstat == NULL) > + return -ENOENT; > + > + pstat->nr_free++; > + pstat->free_bytes += bytes; > + > + pstat = page_stat__find_caller(this.callsite); > if (pstat == NULL) > return -ENOENT; > > @@ -576,41 +807,89 @@ static const char * const migrate_type_str[] = { > "UNKNOWN", > }; > > -static void __print_page_result(struct rb_root *root, > - struct perf_session *session __maybe_unused, > - int n_lines) > +static void __print_page_alloc_result(struct perf_session *session, int n_lines) > { > - struct rb_node *next = rb_first(root); > + struct rb_node *next = rb_first(&page_alloc_sorted); > + struct machine *machine = &session->machines.host; > const char *format; > > - printf("\n%.80s\n", graph_dotted_line); > - printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n", > + printf("\n%.105s\n", graph_dotted_line); > + printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags | Callsite\n", > use_pfn ? "PFN" : "Page"); > - printf("%.80s\n", graph_dotted_line); > + printf("%.105s\n", graph_dotted_line); > > if (use_pfn) > - format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n"; > + format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx | %s\n"; > else > - format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n"; > + format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx | %s\n"; > > while (next && n_lines--) { > struct page_stat *data; > + struct symbol *sym; > + struct map *map; > + char buf[32]; > + char *caller = buf; > > data = rb_entry(next, struct page_stat, node); > + sym = machine__find_kernel_function(machine, data->callsite, > + &map, NULL); > + if (sym && sym->name) > + caller = sym->name; > + else > + scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); > > printf(format, (unsigned long long)data->page, > (unsigned long long)data->alloc_bytes / 1024, > data->nr_alloc, data->order, > migrate_type_str[data->migrate_type], > - (unsigned long)data->gfp_flags); > + (unsigned long)data->gfp_flags, caller); > > next = rb_next(next); > } > > if (n_lines == -1) > - printf(" ... | ... | ... | ... | ... | ... \n"); > + printf(" ... | ... | ... | ... | ... | ... | ...\n"); > > - printf("%.80s\n", graph_dotted_line); > + printf("%.105s\n", graph_dotted_line); > +} > + > +static void __print_page_caller_result(struct perf_session *session, int n_lines) > +{ > + struct rb_node *next = rb_first(&page_caller_sorted); > + struct machine *machine = &session->machines.host; > + > + printf("\n%.105s\n", graph_dotted_line); > + printf(" Total alloc (KB) | Hits | Order | Mig.type | GFP flags | Callsite\n"); > + printf("%.105s\n", graph_dotted_line); > + > + while (next && n_lines--) { > + struct page_stat *data; > + struct symbol *sym; > + struct map *map; > + char buf[32]; > + char *caller = buf; > + > + data = rb_entry(next, struct page_stat, node); > + sym = machine__find_kernel_function(machine, data->callsite, > + &map, NULL); > + if (sym && sym->name) > + caller = sym->name; > + else > + scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); > + > + printf(" %'16llu | %'9d | %5d | %8s | %08lx | %s\n", > + (unsigned long long)data->alloc_bytes / 1024, > + data->nr_alloc, data->order, > + migrate_type_str[data->migrate_type], > + (unsigned long)data->gfp_flags, caller); > + > + next = rb_next(next); > + } > + > + if (n_lines == -1) > + printf(" ... | ... | ... | ... | ... | ...\n"); > + > + printf("%.105s\n", graph_dotted_line); > } > > static void print_slab_summary(void) > @@ -682,8 +961,10 @@ static void print_slab_result(struct perf_session *session) > > static void print_page_result(struct perf_session *session) > { > + if (caller_flag) > + __print_page_caller_result(session, caller_lines); > if (alloc_flag) > - __print_page_result(&page_alloc_sorted, session, alloc_lines); > + __print_page_alloc_result(session, alloc_lines); > print_page_summary(); > } > > @@ -802,6 +1083,7 @@ static void sort_result(void) > } > if (kmem_page) { > __sort_page_result(&page_alloc_tree, &page_alloc_sorted); > + __sort_page_result(&page_caller_tree, &page_caller_sorted); > } > } > > @@ -1084,7 +1366,7 @@ static int __cmd_record(int argc, const char **argv) > if (kmem_slab) > rec_argc += ARRAY_SIZE(slab_events); > if (kmem_page) > - rec_argc += ARRAY_SIZE(page_events); > + rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ > > rec_argv = calloc(rec_argc + 1, sizeof(char *)); > > @@ -1099,6 +1381,8 @@ static int __cmd_record(int argc, const char **argv) > rec_argv[i] = strdup(slab_events[j]); > } > if (kmem_page) { > + rec_argv[i++] = strdup("-g"); > + > for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) > rec_argv[i] = strdup(page_events[j]); > } > @@ -1159,7 +1443,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) > > file.path = input_name; > > - session = perf_session__new(&file, false, &perf_kmem); > + kmem_session = session = perf_session__new(&file, false, &perf_kmem); > if (session == NULL) > return -1; > > @@ -1172,6 +1456,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) > } > > kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); > + symbol_conf.use_callchain = true; > } > > symbol__init(&session->header.env); > -- > 2.3.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>