Daniel Borkmann <daniel@xxxxxxxxxxxxx> 于2022年9月2日周五 23:38写道: > > On 9/2/22 12:12 PM, Yunhui Cui wrote: > [...] > > index a5f21dc3c432..9cb072f9e32b 100644 > > --- a/include/linux/filter.h > > +++ b/include/linux/filter.h > > @@ -565,6 +565,12 @@ struct sk_filter { > > struct bpf_prog *prog; > > }; > > > > ret = dfunc(ctx, prog->insnsi, prog->bpf_func); > > stats = this_cpu_ptr(prog->stats); > > @@ -593,6 +601,11 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog, > > } else { > > ret = dfunc(ctx, prog->insnsi, prog->bpf_func); > > } > > + bact = this_cpu_ptr(&bpftime); > > + flags = u64_stats_update_begin_irqsave(&bact->syncp); > > + u64_stats_add(&bact->nsecs, sched_clock() - start); > > + u64_stats_update_end_irqrestore(&bact->syncp, flags); > > + > > return ret; > > The overhead this adds unconditionally is no-go. Have you tried using/improving: The commit 47c09d6a9f6794caface4ad50930460b82d7c670 can not meet the requirement of being able to visually see the cumulative running time of progs on each cpu. About the overhead, how about moving the above code to if(static_branch_unlikely(&bpf_stats_enabled_key)) {} branch, like prog->stats processing ? Yunhui