The max-latency value can make the histogram smaller, but not larger, we have a maximum of 22 buckets and specifying a max-latency that would require more buckets has no effect. Dynamically allocate the buckets and compute the bucket number from the max latency as (max-min) / range + 2 If the maximum is not specified, we still set the bucket number to 22 and compute the maximum accordingly. Fail if the maximum is smaller than min+range, this way we make sure we always have 3 buckets: those below min, those above max and one in the middle. Since max-latency is not available in log2 mode, always use 22 buckets. Signed-off-by: Gabriele Monaco <gmonaco@xxxxxxxxxx> --- tools/perf/builtin-ftrace.c | 57 +++++++++++++++------ tools/perf/util/bpf_ftrace.c | 6 ++- tools/perf/util/bpf_skel/func_latency.bpf.c | 7 +-- tools/perf/util/ftrace.h | 1 + 4 files changed, 51 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index cfd770ec72867..4f76094ea06d4 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -733,6 +733,7 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[], { int min_latency = ftrace->min_latency; int max_latency = ftrace->max_latency; + unsigned int bucket_num = ftrace->bucket_num; char *p, *q; char *unit; double num; @@ -797,10 +798,10 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[], if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ] i = num / ftrace->bucket_range + 1; if (num >= max_latency - min_latency) - i = NUM_BUCKET -1; + i = bucket_num -1; } - if (i >= NUM_BUCKET) - i = NUM_BUCKET - 1; + if ((unsigned)i >= bucket_num) + i = bucket_num - 1; num += min_latency; do_inc: @@ -820,13 +821,14 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[]) { int min_latency = ftrace->min_latency; bool use_nsec = ftrace->use_nsec; - int i; + unsigned int bucket_num = ftrace->bucket_num; + unsigned int i; int total = 0; int bar_total = 46; /* to fit in 80 column */ char bar[] = "###############################################"; int bar_len; - for (i = 0; i < NUM_BUCKET; i++) + for (i = 0; i < bucket_num; i++) total += buckets[i]; if (total == 0) { @@ -843,7 +845,7 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[]) 0, min_latency ?: 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); - for (i = 1; i < NUM_BUCKET - 1; i++) { + for (i = 1; i < bucket_num - 1; i++) { unsigned int start, stop; const char *unit = use_nsec ? "ns" : "us"; @@ -881,11 +883,11 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[]) bar_total - bar_len, ""); } - bar_len = buckets[NUM_BUCKET - 1] * bar_total / total; + bar_len = buckets[bucket_num - 1] * bar_total / total; if (!ftrace->bucket_range) { printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s "); } else { - unsigned int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range + min_latency; + unsigned int upper_outlier = (bucket_num - 2) * ftrace->bucket_range + min_latency; if (upper_outlier > ftrace->max_latency) upper_outlier = ftrace->max_latency; @@ -897,7 +899,7 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[]) printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us"); } } - printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1], + printf(" | %10d | %.*s%*s |\n", buckets[bucket_num - 1], bar_len, bar, bar_total - bar_len, ""); printf("\n# statistics (in %s)\n", ftrace->use_nsec ? "nsec" : "usec"); @@ -997,7 +999,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace) struct pollfd pollfd = { .events = POLLIN, }; - int buckets[NUM_BUCKET] = { }; + int *buckets; trace_fd = prepare_func_latency(ftrace); if (trace_fd < 0) @@ -1011,6 +1013,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace) evlist__start_workload(ftrace->evlist); + buckets = calloc(ftrace->bucket_num, sizeof(*buckets)); + if (buckets == NULL) { + pr_err("failed to allocate memory for the buckets\n"); + goto out; + } + line[0] = '\0'; while (!done) { if (poll(&pollfd, 1, -1) < 0) @@ -1030,7 +1038,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace) if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf)); pr_err("workload failed: %s\n", emsg); - goto out; + goto out_free_buckets; } /* read remaining buffer contents */ @@ -1045,6 +1053,8 @@ static int __cmd_latency(struct perf_ftrace *ftrace) display_histogram(ftrace, buckets); +out_free_buckets: + free(buckets); out: close(trace_fd); cleanup_func_latency(ftrace); @@ -1634,7 +1644,7 @@ int cmd_ftrace(int argc, const char **argv) OPT_UINTEGER(0, "min-latency", &ftrace.min_latency, "Minimum latency (1st bucket). Works only with --bucket-range."), OPT_UINTEGER(0, "max-latency", &ftrace.max_latency, - "Maximum latency (last bucket). Works only with --bucket-range and total buckets less than 22."), + "Maximum latency (last bucket). Works only with --bucket-range."), OPT_PARENT(common_options), }; const struct option profile_options[] = { @@ -1751,10 +1761,25 @@ int cmd_ftrace(int argc, const char **argv) ret = -EINVAL; goto out_delete_filters; } - if (ftrace.bucket_range && !ftrace.max_latency) { - /* default max latency should depend on bucket range and num_buckets */ - ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range + - ftrace.min_latency; + if (ftrace.bucket_range && ftrace.max_latency && + ftrace.max_latency < ftrace.min_latency + ftrace.bucket_range) { + /* we need at least 1 bucket excluding min and max buckets */ + pr_err("--max-latency must be larger than min-latency + bucket-range\n"); + parse_options_usage(ftrace_usage, options, + "max-latency", /*short_opt=*/false); + ret = -EINVAL; + goto out_delete_filters; + } + /* set default unless max_latency is set and valid */ + ftrace.bucket_num = NUM_BUCKET; + if (ftrace.bucket_range) { + if (ftrace.max_latency) + ftrace.bucket_num = (ftrace.max_latency - ftrace.min_latency) / + ftrace.bucket_range + 2; + else + /* default max latency should depend on bucket range and num_buckets */ + ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range + + ftrace.min_latency; } cmd_func = __cmd_latency; break; diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 25fc280e414ac..51f407a782d6c 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -39,6 +39,10 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) skel->rodata->bucket_range = ftrace->bucket_range; skel->rodata->min_latency = ftrace->min_latency; + skel->rodata->bucket_num = ftrace->bucket_num; + if (ftrace->bucket_range && ftrace->bucket_num) { + bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num); + } /* don't need to set cpu filter for system-wide mode */ if (ftrace->target.cpu_list) { @@ -138,7 +142,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused, if (hist == NULL) return -ENOMEM; - for (idx = 0; idx < NUM_BUCKET; idx++) { + for (idx = 0; idx < skel->rodata->bucket_num; idx++) { err = bpf_map_lookup_elem(fd, &idx, hist); if (err) { buckets[idx] = 0; diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index fb144811b34fc..09e70d40a0f4d 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -50,6 +50,7 @@ const volatile int use_nsec = 0; const volatile unsigned int bucket_range; const volatile unsigned int min_latency; const volatile unsigned int max_latency; +const volatile unsigned int bucket_num = NUM_BUCKET; SEC("kprobe/func") int BPF_PROG(func_begin) @@ -124,16 +125,16 @@ int BPF_PROG(func_end) if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units ) // clang 12 doesn't like s64 / u32 division key = (__u64)delta / bucket_range + 1; - if (key >= NUM_BUCKET || + if (key >= bucket_num || delta >= max_latency - min_latency) - key = NUM_BUCKET - 1; + key = bucket_num - 1; } delta += min_latency; goto do_lookup; } // calculate index using delta - for (key = 0; key < (NUM_BUCKET - 1); key++) { + for (key = 0; key < (bucket_num - 1); key++) { if (delta < (cmp_base << key)) break; } diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index 5dee2caba0fe4..395f97b203ead 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -24,6 +24,7 @@ struct perf_ftrace { unsigned int bucket_range; unsigned int min_latency; unsigned int max_latency; + unsigned int bucket_num; int graph_depth; int func_stack_trace; int func_irq_info; base-commit: 92514ef226f511f2ca1fb1b8752966097518edc0 -- 2.48.1