From: Tonghao Zhang <xiangxia.m.yue@xxxxxxxxx> Now user can enable sysctl kernel.bpf_stats_enabled to fetch run_time_ns and run_cnt. It's easy to calculate the average value. In some case, the max cost for bpf prog invoked, are more useful: is there a burst sysload or high cpu usage: * If prog invoked frequently(run_cnt may be too large), run_time_ns/run_cnt is not ideal to indicate a bpf prog cpu burst. And syscall frequently may consume a lot of CPU cycles. * This also help us to debug bpf prog, the cost is what we want? if not, there may be an issue in bpf prog. This patch introduce a update stats helper. $ bpftool --json --pretty p s ... "run_max_cost_ns": 313367 Signed-off-by: Tonghao Zhang <xiangxia.m.yue@xxxxxxxxx> Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> Cc: Andrii Nakryiko <andrii@xxxxxxxxxx> Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx> Cc: Song Liu <song@xxxxxxxxxx> Cc: Yonghong Song <yhs@xxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: KP Singh <kpsingh@xxxxxxxxxx> Cc: Stanislav Fomichev <sdf@xxxxxxxxxx> Cc: Hao Luo <haoluo@xxxxxxxxxx> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> Cc: Hou Tao <houtao1@xxxxxxxxxx> --- include/linux/filter.h | 29 ++++++++++++++++++++++------- include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 10 +++++++++- kernel/bpf/trampoline.c | 10 +--------- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index bf701976056e..886b65fcd4ac 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -556,6 +556,7 @@ struct bpf_prog_stats { u64_stats_t cnt; u64_stats_t nsecs; u64_stats_t misses; + u64_stats_t max_cost; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); @@ -578,6 +579,26 @@ typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx, unsigned int (*bpf_func)(const void *, const struct bpf_insn *)); +static inline void bpf_prog_update_stats(const struct bpf_prog *prog, u64 start) +{ + struct bpf_prog_stats *stats; + unsigned long flags; + u64 run_time, max_cost; + + stats = this_cpu_ptr(prog->stats); + flags = u64_stats_update_begin_irqsave(&stats->syncp); + + run_time = sched_clock() - start; + u64_stats_inc(&stats->cnt); + u64_stats_add(&stats->nsecs, run_time); + + max_cost = u64_stats_read(&stats->max_cost); + if (max_cost < run_time) + u64_stats_set(&stats->max_cost, run_time); + + u64_stats_update_end_irqrestore(&stats->syncp, flags); +} + static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, const void *ctx, bpf_dispatcher_fn dfunc) @@ -586,16 +607,10 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, cant_migrate(); if (static_branch_unlikely(&bpf_stats_enabled_key)) { - struct bpf_prog_stats *stats; u64 start = sched_clock(); - unsigned long flags; ret = dfunc(ctx, prog->insnsi, prog->bpf_func); - stats = this_cpu_ptr(prog->stats); - flags = u64_stats_update_begin_irqsave(&stats->syncp); - u64_stats_inc(&stats->cnt); - u64_stats_add(&stats->nsecs, sched_clock() - start); - u64_stats_update_end_irqrestore(&stats->syncp, flags); + bpf_prog_update_stats(prog, start); } else { ret = dfunc(ctx, prog->insnsi, prog->bpf_func); } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 464ca3f01fe7..da4d1f2d7bc2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6259,6 +6259,7 @@ struct bpf_prog_info { __u32 verified_insns; __u32 attach_btf_obj_id; __u32 attach_btf_id; + __u64 run_max_cost_ns; } __attribute__((aligned(8))); struct bpf_map_info { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 64131f88c553..06439b09863d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2105,6 +2105,7 @@ struct bpf_prog_kstats { u64 nsecs; u64 cnt; u64 misses; + u64 max_cost; }; void notrace bpf_prog_inc_misses_counter(struct bpf_prog *prog) @@ -2122,12 +2123,13 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, struct bpf_prog_kstats *stats) { u64 nsecs = 0, cnt = 0, misses = 0; + u64 max_cost = 0; int cpu; for_each_possible_cpu(cpu) { const struct bpf_prog_stats *st; unsigned int start; - u64 tnsecs, tcnt, tmisses; + u64 tnsecs, tcnt, tmisses, tmax_cost; st = per_cpu_ptr(prog->stats, cpu); do { @@ -2135,14 +2137,17 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog, tnsecs = u64_stats_read(&st->nsecs); tcnt = u64_stats_read(&st->cnt); tmisses = u64_stats_read(&st->misses); + tmax_cost = u64_stats_read(&st->max_cost); } while (u64_stats_fetch_retry(&st->syncp, start)); nsecs += tnsecs; cnt += tcnt; misses += tmisses; + max_cost = max(max_cost, tmax_cost); } stats->nsecs = nsecs; stats->cnt = cnt; stats->misses = misses; + stats->max_cost = max_cost; } #ifdef CONFIG_PROC_FS @@ -2162,6 +2167,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) "prog_id:\t%u\n" "run_time_ns:\t%llu\n" "run_cnt:\t%llu\n" + "run_max_cost_ns:\t%llu\n" "recursion_misses:\t%llu\n" "verified_insns:\t%u\n", prog->type, @@ -2171,6 +2177,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) prog->aux->id, stats.nsecs, stats.cnt, + stats.max_cost, stats.misses, prog->aux->verified_insns); } @@ -3962,6 +3969,7 @@ static int bpf_prog_get_info_by_fd(struct file *file, info.run_time_ns = stats.nsecs; info.run_cnt = stats.cnt; info.recursion_misses = stats.misses; + info.run_max_cost_ns = stats.max_cost; info.verified_insns = prog->aux->verified_insns; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index d6395215b849..4ddad462562e 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -882,8 +882,6 @@ static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tram static void notrace update_prog_stats(struct bpf_prog *prog, u64 start) { - struct bpf_prog_stats *stats; - if (static_branch_unlikely(&bpf_stats_enabled_key) && /* static_key could be enabled in __bpf_prog_enter* * and disabled in __bpf_prog_exit*. @@ -891,13 +889,7 @@ static void notrace update_prog_stats(struct bpf_prog *prog, * Hence check that 'start' is valid. */ start > NO_START_TIME) { - unsigned long flags; - - stats = this_cpu_ptr(prog->stats); - flags = u64_stats_update_begin_irqsave(&stats->syncp); - u64_stats_inc(&stats->cnt); - u64_stats_add(&stats->nsecs, sched_clock() - start); - u64_stats_update_end_irqrestore(&stats->syncp, flags); + bpf_prog_update_stats(prog, start); } } -- 2.27.0