Currently it shows a caller function for each entry, but users need to see the full call stacks sometimes. Use -v/--verbose option to do that. # perf lock con -a -b -v sleep 3 Looking at the vmlinux_path (8 entries long) symsrc__init: cannot get elf header. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols contended total wait max wait avg wait type caller 1 10.74 us 10.74 us 10.74 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb841015 tick_do_update_jiffies64+0x25 0xffffffffbb8409ee tick_irq_enter+0x9e 1 7.70 us 7.70 us 7.70 us spinlock __bpf_trace_contention_begin+0xb 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffc03b5c47 bpf_prog_bf07ae9e2cbd02c5_contention_begin+0x117 0xffffffffbb8b8e75 bpf_trace_run2+0x35 0xffffffffbb7eab9b __bpf_trace_contention_begin+0xb 0xffffffffbb7ebe75 queued_spin_lock_slowpath+0x1f5 0xffffffffbc1c26ff _raw_spin_lock+0x1f 0xffffffffbb7bc27e raw_spin_rq_lock_nested+0xe 0xffffffffbb7cef9c load_balance+0x66c Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx> --- tools/perf/builtin-lock.c | 43 ++++++++++++++++++++++----- tools/perf/util/bpf_lock_contention.c | 9 ++++++ tools/perf/util/lock-contention.h | 1 + 3 files changed, 46 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 2a5672f8d22e..e66fbb38d8df 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -972,13 +972,14 @@ static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sampl return -1; } -static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) +static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample, u64 *callchains) { struct callchain_cursor *cursor = &callchain_cursor; struct machine *machine = &session->machines.host; struct thread *thread; u64 hash = 0; int skip = 0; + int i = 0; int ret; thread = machine__findnew_thread(machine, -1, sample->pid); @@ -1002,6 +1003,9 @@ static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample) if (node == NULL) break; + if (callchains) + callchains[i++] = node->ip; + /* skip first few entries - for lock functions */ if (++skip <= CONTENTION_STACK_SKIP) goto next; @@ -1025,6 +1029,7 @@ static int report_lock_contention_begin_event(struct evsel *evsel, struct lock_seq_stat *seq; u64 addr = evsel__intval(evsel, sample, "lock_addr"); u64 key; + u64 callchains[CONTENTION_STACK_DEPTH]; switch (aggr_mode) { case LOCK_AGGR_ADDR: @@ -1034,7 +1039,9 @@ static int report_lock_contention_begin_event(struct evsel *evsel, key = sample->tid; break; case LOCK_AGGR_CALLER: - key = callchain_id(evsel, sample); + if (verbose) + memset(callchains, 0, sizeof(callchains)); + key = callchain_id(evsel, sample, verbose ? callchains : NULL); break; default: pr_err("Invalid aggregation mode: %d\n", aggr_mode); @@ -1053,6 +1060,12 @@ static int report_lock_contention_begin_event(struct evsel *evsel, ls = lock_stat_findnew(key, caller, flags); if (!ls) return -ENOMEM; + + if (aggr_mode == LOCK_AGGR_CALLER && verbose) { + ls->callstack = memdup(callchains, sizeof(callchains)); + if (ls->callstack == NULL) + return -ENOMEM; + } } ts = thread_stat_findnew(sample->tid); @@ -1117,7 +1130,7 @@ static int report_lock_contention_end_event(struct evsel *evsel, key = sample->tid; break; case LOCK_AGGR_CALLER: - key = callchain_id(evsel, sample); + key = callchain_id(evsel, sample, NULL); break; default: pr_err("Invalid aggregation mode: %d\n", aggr_mode); @@ -1466,7 +1479,7 @@ static void sort_contention_result(void) sort_result(); } -static void print_contention_result(void) +static void print_contention_result(struct lock_contention *con) { struct lock_stat *st; struct lock_key *key; @@ -1505,6 +1518,22 @@ static void print_contention_result(void) } pr_info(" %10s %s\n", get_type_str(st), st->name); + if (verbose) { + struct map *kmap; + struct symbol *sym; + char buf[128]; + u64 ip; + + for (int i = 0; i < CONTENTION_STACK_DEPTH; i++) { + if (!st->callstack || !st->callstack[i]) + break; + + ip = st->callstack[i]; + sym = machine__find_kernel_symbol(con->machine, ip, &kmap); + get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf)); + pr_info("\t\t\t%#lx %s\n", (unsigned long)ip, buf); + } + } } print_bad_events(bad, total); @@ -1620,6 +1649,8 @@ static int __cmd_contention(int argc, const char **argv) return PTR_ERR(session); } + con.machine = &session->machines.host; + /* for lock function check */ symbol_conf.sort_by_name = true; symbol__init(&session->header.env); @@ -1638,8 +1669,6 @@ static int __cmd_contention(int argc, const char **argv) signal(SIGCHLD, sighandler); signal(SIGTERM, sighandler); - con.machine = &session->machines.host; - con.evlist = evlist__new(); if (con.evlist == NULL) { err = -ENOMEM; @@ -1711,7 +1740,7 @@ static int __cmd_contention(int argc, const char **argv) setup_pager(); sort_contention_result(); - print_contention_result(); + print_contention_result(&con); out_delete: evlist__delete(con.evlist); diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index c591a66733ef..6545bee65347 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -8,6 +8,7 @@ #include "util/thread_map.h" #include "util/lock-contention.h" #include <linux/zalloc.h> +#include <linux/string.h> #include <bpf/bpf.h> #include "bpf_skel/lock_contention.skel.h" @@ -171,6 +172,14 @@ int lock_contention_read(struct lock_contention *con) return -1; } + if (verbose) { + st->callstack = memdup(stack_trace, sizeof(stack_trace)); + if (st->callstack == NULL) { + free(st); + return -1; + } + } + hlist_add_head(&st->hash_entry, con->result); prev_key = key; } diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index 2146efc33396..bdb6e2a61e5b 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -11,6 +11,7 @@ struct lock_stat { u64 addr; /* address of lockdep_map, used as ID */ char *name; /* for strcpy(), we cannot use const */ + u64 *callstack; unsigned int nr_acquire; unsigned int nr_acquired; -- 2.37.2.789.g6183377224-goog