Using the BPF_PROG_RUN mechanism, we can run a raw_tp BPF program to collect some semi-global locks like per-cpu locks. Let's add runqueue locks using bpf_per_cpu_ptr() helper. $ sudo ./perf lock con -abl -- sleep 1 contended total wait max wait avg wait address symbol 248 3.25 ms 32.23 us 13.10 us ffff8cc75cfd2940 siglock 60 217.91 us 9.69 us 3.63 us ffff8cc700061c00 8 70.23 us 13.86 us 8.78 us ffff8cc703629484 4 56.32 us 35.81 us 14.08 us ffff8cc78b66f778 mmap_lock 4 16.70 us 5.18 us 4.18 us ffff8cc7036a0684 3 4.99 us 2.65 us 1.66 us ffff8d053da30c80 rq_lock 2 3.44 us 2.28 us 1.72 us ffff8d053dcf0c80 rq_lock 9 2.51 us 371 ns 278 ns ffff8ccb92479440 2 2.11 us 1.24 us 1.06 us ffff8d053db30c80 rq_lock 2 2.06 us 1.69 us 1.03 us ffff8d053d970c80 rq_lock Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx> --- tools/perf/util/bpf_lock_contention.c | 27 ++++++++++++++-- .../perf/util/bpf_skel/lock_contention.bpf.c | 31 +++++++++++++++++++ tools/perf/util/bpf_skel/lock_data.h | 5 +++ 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 51631af3b4d6..235fc7150545 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -151,6 +151,8 @@ int lock_contention_prepare(struct lock_contention *con) skel->bss->needs_callstack = con->save_callstack; skel->bss->lock_owner = con->owner; + bpf_program__set_autoload(skel->progs.collect_lock_syms, false); + lock_contention_bpf__attach(skel); return 0; } @@ -198,14 +200,26 @@ static const char *lock_contention_get_name(struct lock_contention *con, } if (con->aggr_mode == LOCK_AGGR_ADDR) { + int lock_fd = bpf_map__fd(skel->maps.lock_syms); + + /* per-process locks set upper bits of the flags */ if (flags & LCD_F_MMAP_LOCK) return "mmap_lock"; if (flags & LCD_F_SIGHAND_LOCK) return "siglock"; + + /* global locks with symbols */ sym = machine__find_kernel_symbol(machine, key->lock_addr, &kmap); if (sym) - name = sym->name; - return name; + return sym->name; + + /* try semi-global locks collected separately */ + if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr, &flags)) { + if (flags == LOCK_CLASS_RQLOCK) + return "rq_lock"; + } + + return ""; } /* LOCK_AGGR_CALLER: skip lock internal functions */ @@ -258,6 +272,15 @@ int lock_contention_read(struct lock_contention *con) thread__set_comm(idle, "swapper", /*timestamp=*/0); } + if (con->aggr_mode == LOCK_AGGR_ADDR) { + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .flags = BPF_F_TEST_RUN_ON_CPU, + ); + int prog_fd = bpf_program__fd(skel->progs.collect_lock_syms); + + bpf_prog_test_run_opts(prog_fd, &opts); + } + /* make sure it loads the kernel map */ map__load(maps__first(machine->kmaps)); diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 4ba34caf84eb..2d50c4395733 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -10,6 +10,9 @@ /* default buffer size */ #define MAX_ENTRIES 10240 +/* for collect_lock_syms(). 4096 was rejected by the verifier */ +#define MAX_CPUS 1024 + /* lock contention flags from include/trace/events/lock.h */ #define LCB_F_SPIN (1U << 0) #define LCB_F_READ (1U << 1) @@ -56,6 +59,13 @@ struct { __uint(max_entries, MAX_ENTRIES); } task_data SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 16384); +} lock_syms SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); @@ -378,4 +388,25 @@ int contention_end(u64 *ctx) return 0; } +extern struct rq runqueues __ksym; + +SEC("raw_tp/bpf_test_finish") +int BPF_PROG(collect_lock_syms) +{ + __u64 lock_addr; + __u32 lock_flag; + + for (int i = 0; i < MAX_CPUS; i++) { + struct rq *rq = bpf_per_cpu_ptr(&runqueues, i); + + if (rq == NULL) + break; + + lock_addr = (__u64)&rq->__lock; + lock_flag = LOCK_CLASS_RQLOCK; + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index 5ed1a0955015..e59366f2dba3 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -36,4 +36,9 @@ enum lock_aggr_mode { LOCK_AGGR_CALLER, }; +enum lock_class_sym { + LOCK_CLASS_NONE, + LOCK_CLASS_RQLOCK, +}; + #endif /* UTIL_BPF_SKEL_LOCK_DATA_H */ -- 2.40.0.rc1.284.g88254d51c5-goog