On 9/2/22 12:12 PM, Yunhui Cui wrote:
[...]
index a5f21dc3c432..9cb072f9e32b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -565,6 +565,12 @@ struct sk_filter {
struct bpf_prog *prog;
};
+struct bpf_account {
+ u64_stats_t nsecs;
+ struct u64_stats_sync syncp;
+};
+DECLARE_PER_CPU(struct bpf_account, bpftime);
+
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
typedef unsigned int (*bpf_dispatcher_fn)(const void *ctx,
@@ -577,12 +583,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
bpf_dispatcher_fn dfunc)
{
u32 ret;
+ struct bpf_account *bact;
+ unsigned long flags;
+ u64 start = 0;
cant_migrate();
+ start = sched_clock();
if (static_branch_unlikely(&bpf_stats_enabled_key)) {
struct bpf_prog_stats *stats;
- u64 start = sched_clock();
- unsigned long flags;
ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
stats = this_cpu_ptr(prog->stats);
@@ -593,6 +601,11 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
} else {
ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
}
+ bact = this_cpu_ptr(&bpftime);
+ flags = u64_stats_update_begin_irqsave(&bact->syncp);
+ u64_stats_add(&bact->nsecs, sched_clock() - start);
+ u64_stats_update_end_irqrestore(&bact->syncp, flags);
+
return ret;
The overhead this adds unconditionally is no-go. Have you tried using/improving:
commit 47c09d6a9f6794caface4ad50930460b82d7c670
Author: Song Liu <songliubraving@xxxxxx>
Date: Mon Mar 9 10:32:15 2020 -0700
bpftool: Introduce "prog profile" command
With fentry/fexit programs, it is possible to profile BPF program with
hardware counters. Introduce bpftool "prog profile", which measures key
metrics of a BPF program.
bpftool prog profile command creates per-cpu perf events. Then it attaches
fentry/fexit programs to the target BPF program. The fentry program saves
perf event value to a map. The fexit program reads the perf event again,
and calculates the difference, which is the instructions/cycles used by
the target program.
Example input and output:
./bpftool prog profile id 337 duration 3 cycles instructions llc_misses
4228 run_cnt
3403698 cycles (84.08%)
3525294 instructions # 1.04 insn per cycle (84.05%)
13 llc_misses # 3.69 LLC misses per million isns (83.50%)
This command measures cycles and instructions for BPF program with id
337 for 3 seconds. The program has triggered 4228 times. The rest of the
output is similar to perf-stat. [...]
Thanks,
Daniel