> On 28 Feb 2025, at 12:42 AM, Namhyung Kim <namhyung@xxxxxxxxxx> wrote: > > When BPF collects the stats for the latency in usec, it first divides > the time by 1000. But that means it would have 0 if the delta is small > and won't update the total time properly. > > Let's keep the stats in nsec always and adjust to usec before printing. > > Before: > > $ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1 > # DURATION | COUNT | GRAPH | > 0 - 1 us | 765 | ############################################# | > 1 - 2 us | 10 | | > 2 - 4 us | 2 | | > 4 - 8 us | 5 | | > > # statistics (in usec) > total time: 0 <<<--- (here) > avg time: 0 > max time: 6 > min time: 0 > count: 782 > > After: > > $ sudo ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1 > # DURATION | COUNT | GRAPH | > 0 - 1 us | 880 | ############################################ | > 1 - 2 us | 13 | | > 2 - 4 us | 8 | | > 4 - 8 us | 3 | | > > # statistics (in usec) > total time: 268 <<<--- (here) > avg time: 0 > max time: 6 > min time: 0 > count: 904 > Tested in powerpc for the total time change in usec before and after the patch. After the patch, shows non-zero in total time. # ./perf ftrace latency -ab -T mutex_lock --hide-empty -- sleep 0.1 # DURATION | COUNT | GRAPH | 0 - 1 us | 38 | ############################## | 1 - 2 us | 19 | ############### | 2 - 4 us | 1 | | # statistics (in usec) total time: 53 avg time: 1 max time: 2 min time: 0 count: 58 Tested-by: Athira Rajeev <atrajeev@xxxxxxxxxxxxx> > Cc: Gabriele Monaco <gmonaco@xxxxxxxxxx> > Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx> > --- > tools/perf/util/bpf_ftrace.c | 8 +++++++- > tools/perf/util/bpf_skel/func_latency.bpf.c | 20 ++++++++------------ > 2 files changed, 15 insertions(+), 13 deletions(-) > > diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c > index 51f407a782d6c58a..7324668cc83e747e 100644 > --- a/tools/perf/util/bpf_ftrace.c > +++ b/tools/perf/util/bpf_ftrace.c > @@ -128,7 +128,7 @@ int perf_ftrace__latency_stop_bpf(struct perf_ftrace *ftrace __maybe_unused) > return 0; > } > > -int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, > +int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace, > int buckets[], struct stats *stats) > { > int i, fd, err; > @@ -158,6 +158,12 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, > stats->n = skel->bss->count; > stats->max = skel->bss->max; > stats->min = skel->bss->min; > + > + if (!ftrace->use_nsec) { > + stats->mean /= 1000; > + stats->max /= 1000; > + stats->min /= 1000; > + } > } > > free(hist); > diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c > index 09e70d40a0f4d855..3d3d9f427c20876e 100644 > --- a/tools/perf/util/bpf_skel/func_latency.bpf.c > +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c > @@ -102,6 +102,7 @@ int BPF_PROG(func_end) > start = bpf_map_lookup_elem(&functime, &tid); > if (start) { > __s64 delta = bpf_ktime_get_ns() - *start; > + __u64 val = delta; > __u32 key = 0; > __u64 *hist; > > @@ -111,26 +112,24 @@ int BPF_PROG(func_end) > return 0; > > if (bucket_range != 0) { > - delta /= cmp_base; > + val = delta / cmp_base; > > if (min_latency > 0) { > - if (delta > min_latency) > - delta -= min_latency; > + if (val > min_latency) > + val -= min_latency; > else > goto do_lookup; > } > > // Less than 1 unit (ms or ns), or, in the future, > // than the min latency desired. > - if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units ) > - // clang 12 doesn't like s64 / u32 division > - key = (__u64)delta / bucket_range + 1; > + if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units ) > + key = val / bucket_range + 1; > if (key >= bucket_num || > - delta >= max_latency - min_latency) > + val >= max_latency - min_latency) > key = bucket_num - 1; > } > > - delta += min_latency; > goto do_lookup; > } > // calculate index using delta > @@ -146,10 +145,7 @@ int BPF_PROG(func_end) > > *hist += 1; > > - if (bucket_range == 0) > - delta /= cmp_base; > - > - __sync_fetch_and_add(&total, delta); > + __sync_fetch_and_add(&total, delta); // always in nsec > __sync_fetch_and_add(&count, 1); > > if (delta > max) > -- > 2.48.1.711.g2feabab25a-goog > >