On 19/09/2024 03:15, Eduard Zingerman wrote:
On Wed, 2024-09-18 at 21:39 +0100, Mykyta Yatsenko wrote:
From: Mykyta Yatsenko <yatsenko@xxxxxxxx>
Production BPF programs are increasing in number of instructions and states
to the point, where optimising verification process for them is necessary
to avoid running into instruction limit. Authors of those BPF programs
need to analyze verifier output, for example, collecting the most
frequent source code lines to understand which part of the program has
the biggest verification cost.
This patch introduces `--top-src-lines` flag in veristat.
`--top-src-lines=N` makes veristat output N the most popular sorce code
lines, parsed from verification log.
An example:
```
$ sudo ./veristat --log-size=1000000000 --top-src-lines=4 pyperf600.bpf.o
Processing 'pyperf600.bpf.o'...
Top source lines (on_event):
4697: (pyperf.h:0)
2334: (pyperf.h:326) event->stack[i] = *symbol_id;
2334: (pyperf.h:118) pidData->offsets.String_data);
1176: (pyperf.h:92) bpf_probe_read_user(&frame->f_back,
...
```
Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx>
I think this is a cool feature!
It's a bit of a shame that we don't collect information like this in
the verifier itself, where it would be simpler to do (e.g. associate a
counter with each instruction, or with each jump target).
Thanks for looking at this patch.
[...]
+static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
+{
+ int lines_cap = 1;
+ int lines_size = 0;
+ char **lines;
+ char *line = NULL;
+ char *state;
+ struct line_cnt *freq = NULL;
+ struct line_cnt *cur;
+ int unique_lines;
+ int err;
Note:
when compiling with clang 20.0.0git the following warning is reported:
veristat.c:957:14: error: variable 'err' is used uninitialized whenever 'for' loop exits because its condition is false [-Werror,-Wsometimes-uninitialized]
957 | for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
veristat.c:972:9: note: uninitialized use occurs here
972 | return err;
|
...
veristat.c:903:9: note: initialize the variable 'err' to silence this warning
903 | int err;
| ^
| = 0
yes, I've got a mail from CI regarding this, going to fix in the next
version.
Also, a nitpick: declarations should be sorted in a "reverse Christmas
tree" order (at-least that's what Andrii enforces :).
+ int i;
+
+ lines = calloc(lines_cap, sizeof(char *));
Nitpick: here and in a few places below use sizeof(*<array>), e.g.:
calloc(lines_cap, sizeof(*lines))
+ if (!lines)
+ return -ENOMEM;
+
+ while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
+ if (strncmp(line, "; ", 2))
+ continue;
+ line += 2;
+
+ if (lines_size == lines_cap) {
+ char **tmp;
+
+ lines_cap *= 2;
+ tmp = realloc(lines, lines_cap * sizeof(char *));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ lines = tmp;
+ }
+ lines[lines_size] = line;
+ lines_size++;
+ }
+
+ if (!lines_size)
+ goto cleanup;
+
+ qsort(lines, lines_size, sizeof(char *), str_cmp);
+
+ freq = calloc(lines_size, sizeof(struct line_cnt));
+ if (!freq) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ cur = freq;
+ cur->line = lines[0];
+ cur->cnt = 1;
+ for (i = 1; i < lines_size; ++i) {
+ if (strcmp(lines[i], cur->line)) {
+ cur++;
+ cur->line = lines[i];
+ cur->cnt = 0;
+ }
+ cur->cnt++;
+ }
+ unique_lines = cur - freq + 1;
+
+ qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
+
+ printf("Top source lines (%s):\n", prog_name);
+ for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
+ char *src_code;
+ char *src_line;
+
+ src_code = strtok_r(freq[i].line, "@", &state);
Does verifier guarantee presence of '@' for each source comment line?
It does not, if there is no @ character in the source line, I just print
a full source line.
`if (src_line)` should check for that.
+ src_line = strtok_r(NULL, "\0", &state);
+ if (src_line)
The '.line' string is null-terminated, can 'src_line' ever be NULL?
+ printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line + 1, src_code);
+ else
+ printf("%5d: %s\n", freq[i].cnt, src_code);
+ }
+
+cleanup:
+ free(freq);
+ free(lines);
+ return err;
+}
+
[...]