Re: [PATCH bpf-next] selftests/bpf: emit top frequent C code lines in veristat

Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> · Thu, 22 Aug 2024 15:29:55 -0700

On Tue, Aug 20, 2024 at 8:24 AM Mykyta Yatsenko
<mykyta.yatsenko5@xxxxxxxxx> wrote:
>
> From: Mykyta Yatsenko <yatsenko@xxxxxxxx>
>
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent C source code lines to understand which part of the program has
> the biggest verification cost.
>
> This patch introduces `--top-lines` and `--include-instructions` flags in
> veristat.
> `--top-lines=N` makes veristat output N the most popular C sorce code
> lines, parsed from verification log. `--include-instructions` enables
> printing BPF instructions along with C source code.

Hm... I think --include-instructions needs a bit more thought to be
useful. Just one assembly instruction isn't all that useful, we should
be thinking in terms of blocks of assembly instruction, probably...
But then not sure how to take that into account when calculating top N
frequencies...

Not sure about all that. For v2, let's drop the assembly instructions
parts and try to get --top-lines logic right. We can then see how it
works in practice and adjust and extend as necessary.

>
> Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx>
> ---
>  tools/testing/selftests/bpf/veristat.c | 160 +++++++++++++++++++++++++
>  1 file changed, 160 insertions(+)
>

Ok, #1 problem is that --top-lines is useless without -vl2, so we
should either check that this is specified. Or maybe better force
verbose log internally without actually emitting it, probably it's
better.

> diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
> index 1ec5c4c47235..977ab54cba83 100644
> --- a/tools/testing/selftests/bpf/veristat.c
> +++ b/tools/testing/selftests/bpf/veristat.c
> @@ -16,10 +16,12 @@
>  #include <sys/stat.h>
>  #include <bpf/libbpf.h>
>  #include <bpf/btf.h>
> +#include <bpf/hashmap.h>

well, great for veristat in kernel repo, but this is libbpf-internal
thing and I'd like to avoid relying on it in veristat to make Github
sync simple.

>  #include <libelf.h>
>  #include <gelf.h>
>  #include <float.h>
>  #include <math.h>
> +#include <linux/err.h>
>
>  #ifndef ARRAY_SIZE
>  #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
> @@ -179,8 +181,16 @@ static struct env {
>         int files_skipped;
>         int progs_processed;
>         int progs_skipped;
> +       int top_lines;
> +       bool include_insn;
>  } env;
>
> +struct line_cnt {
> +       long cnt;
> +       char *line;
> +       char *insn;
> +};
> +
>  static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
>  {
>         if (!env.verbose)
> @@ -206,6 +216,8 @@ const char argp_program_doc[] =
>  enum {
>         OPT_LOG_FIXED = 1000,
>         OPT_LOG_SIZE = 1001,
> +       OPT_TOP_LINES = 1002,
> +       OPT_INCLUDE_INSN = 1003,
>  };
>
>  static const struct argp_option opts[] = {
> @@ -228,6 +240,9 @@ static const struct argp_option opts[] = {
>           "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
>         { "test-reg-invariants", 'r', NULL, 0,
>           "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
> +       { "top-lines", OPT_TOP_LINES, "N", 0, "Emit N the most frequent C source code lines." },

"Emit N most frequent source code lines."

Doesn't have to be C, in general.

maybe let's call it "--top-src-lines" to be a bit more specific?

> +       { "include-instructions", OPT_INCLUDE_INSN, NULL, OPTION_HIDDEN,
> +         "If emitting the most frequent C source code lines, include their BPF instructions." },
>         {},
>  };
>
> @@ -337,6 +352,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
>                         return -ENOMEM;
>                 env.filename_cnt++;
>                 break;
> +       case OPT_TOP_LINES:
> +               errno = 0;
> +               env.top_lines = strtol(arg, NULL, 10);
> +               if (errno) {
> +                       fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
> +                       argp_usage(state);
> +               }
> +               break;
> +       case OPT_INCLUDE_INSN:
> +               env.include_insn = true;
> +               break;
>         default:
>                 return ARGP_ERR_UNKNOWN;
>         }
> @@ -817,6 +843,24 @@ static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
>         free(stats);
>  }
>
> +static int line_cnt_cmp(const void *a, const void *b)
> +{
> +       const struct line_cnt **a_cnt = (const struct line_cnt **)a;
> +       const struct line_cnt **b_cnt = (const struct line_cnt **)b;
> +
> +       return (*b_cnt)->cnt - (*a_cnt)->cnt;
> +}
> +
> +static size_t str_hash_fn(long key, void *ctx)
> +{
> +       return str_hash((void *)key);
> +}
> +
> +static bool str_equal_fn(long a, long b, void *ctx)
> +{
> +       return strcmp((void *)a, (void *)b) == 0;
> +}
> +
>  static char verif_log_buf[64 * 1024];
>
>  #define MAX_PARSED_LOG_LINES 100
> @@ -854,6 +898,120 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
>         return 0;
>  }
>
> +static char *parse_instructions(char *buf, char *buf_end)
> +{
> +       char *start = buf;
> +
> +       while (buf && buf < buf_end && *buf && *buf != ';') {
> +               char *num_end = NULL;
> +
> +               strtol(buf, &num_end, 10);
> +               if (!num_end || *num_end != ':')
> +                       break;
> +
> +               buf = strchr(num_end, '\n');
> +       }
> +
> +       return start == buf ? NULL : strndup(start, buf - start);
> +}
> +
> +static int print_top_lines(char * const buf, size_t buf_sz)
> +{
> +       struct hashmap *lines_map;
> +       struct line_cnt **lines_cnt = NULL;
> +       struct hashmap_entry *entry;
> +       char *buf_end;
> +       char *line;
> +       int err = 0;
> +       int unique_lines;
> +       int bkt;
> +       int i;
> +
> +       if (!buf || !buf_sz)
> +               return -EINVAL;
> +

let's make sure we don't call print_top_lines with not buffer instead ?

> +       buf_end = buf + buf_sz - 1;
> +       *buf_end = '\0';

and buffer should be valid, so no need to zero-terminate it (verifier
guarantees that)

> +       lines_map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
> +       if (IS_ERR(lines_map))
> +               return PTR_ERR(lines_map);
> +
> +       for (char *line_start = buf; line_start < buf_end;) {
> +               char *line_end = strchr(line_start, '\n');

any reason we can't use strtok_r() for this?

> +
> +               if (!line_end)
> +                       line_end = buf_end;
> +
> +               if (*line_start == ';') {

let's check that it starts with "; " with strncmp() and skip space as well?

> +                       struct line_cnt *line_cnt;
> +
> +                       line_start++; /* skip semicolon */
> +                       *line_end = '\0';
> +                       if (hashmap__find(lines_map, line_start, &line_cnt)) {
> +                               line_cnt->cnt++;

so as I mentioned, I'd like to avoid the use of libbpf's hashmap. How
about we just add each string's offset within the buffer into a
(rather long sometimes) array of u32s. Then implement custom
comparator that would compare actual strings within log buffer by its
offset. Sort such indices this way, and then (reusing this comparator)
implement "unique" operation just like std::unique. Then we'll only
need to re-sort indices (but now taking their total counts), and emit
first/last N items.

Basically, keep it cheap by using offsets, but otherwise rely on NlogN
sorting to avoid hashmaps.

> +                       } else {
> +                               char *insn = NULL;
> +
> +                               line_cnt = malloc(sizeof(struct line_cnt));
> +                               if (!line_cnt) {
> +                                       *line_end = '\n';
> +                                       goto cleanup;
> +                               }
> +                               line = strdup(line_start);
> +                               if (!line) {
> +                                       *line_end = '\n';
> +                                       free(line_cnt);
> +                                       goto cleanup;
> +                               }
> +                               if (env.include_insn)
> +                                       insn = parse_instructions(line_end + 1, buf_end);
> +                               line_cnt->insn = insn;
> +                               line_cnt->line = line;
> +                               line_cnt->cnt = 1;
> +                               err = hashmap__add(lines_map, line, line_cnt);
> +                       }
> +                       *line_end = '\n';
> +
> +                       if (err)
> +                               goto cleanup;
> +               }
> +               line_start = line_end + 1;
> +       }
> +       unique_lines = hashmap__size(lines_map);
> +       if (!unique_lines)
> +               goto cleanup;
> +
> +       lines_cnt = calloc(unique_lines, sizeof(struct line_cnt *));
> +       if (!lines_cnt)
> +               goto cleanup;
> +
> +       i = 0;
> +       hashmap__for_each_entry(lines_map, entry, bkt)
> +               lines_cnt[i++] = (struct line_cnt *)entry->value;
> +
> +       qsort(lines_cnt, unique_lines, sizeof(struct line_cnt *), line_cnt_cmp);
> +
> +       printf("Top C source code lines:\n");

nit: there is no need to say "C source code", it's just "source code"

> +       for (i = 0; i  < min(unique_lines, env.top_lines); i++) {
> +               printf("; [Count: %ld] %s\n", lines_cnt[i]->cnt, lines_cnt[i]->line);

[Count: %ld] prefix is super verbose. Let's just emit a number of
occurrences without any extra "Count" text.

BTW, newer verifiers emit file location information now which looks
like " @ test_vmlinux.c:82", it would be nice if we could detect that,
parse it out separately (at the very last moment, during output) and
reformat everything to something like:

123: (test_vmlinux.c:82) <the rest of source code line>

Make sure to use something like %5d for frequency, so at least that
part is nicely aligned

> +               if (env.include_insn)
> +                       printf("%s\n", lines_cnt[i]->insn);
> +       }
> +       printf("\n");
> +
> +cleanup:
> +       hashmap__for_each_entry(lines_map, entry, bkt) {
> +               struct line_cnt *line_cnt = (struct line_cnt *)entry->value;
> +
> +               free(line_cnt->insn);
> +               free(line_cnt->line);
> +               free(line_cnt);
> +       }

we really shouldn't be allocating so much for strings, we have one
huge string and we should be dealing with indices into the buffer

pw-bot: cr

> +       hashmap__free(lines_map);
> +       free(lines_cnt);
> +       return err;
> +}
> +
>  static int guess_prog_type_by_ctx_name(const char *ctx_name,
>                                        enum bpf_prog_type *prog_type,
>                                        enum bpf_attach_type *attach_type)
> @@ -1048,6 +1206,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
>                        filename, prog_name, stats->stats[DURATION],
>                        err ? "failure" : "success", buf);
>         }
> +       if (env.top_lines)
> +               print_top_lines(buf, buf_sz);
>
>         if (verif_log_buf != buf)
>                 free(buf);
> --
> 2.46.0
>