On Mon, Feb 3, 2025 at 8:41 AM Mykyta Yatsenko <mykyta.yatsenko5@xxxxxxxxx> wrote: > > From: Mykyta Yatsenko <yatsenko@xxxxxxxx> > > To better verify some complex BPF programs we'd like to preset global > variables. > This patch introduces CLI argument `--set-global-vars` to veristat, that > allows presetting values to global variables defined in BPF program. For > example: > > prog.c: > ``` > enum Enum { ELEMENT1 = 0, ELEMENT2 = 5 }; > const volatile __s64 a = 5; > const volatile __u8 b = 5; > const volatile enum Enum c = ELEMENT2; > const volatile bool d = false; > > char arr[4] = {0}; > > SEC("tp_btf/sched_switch") > int BPF_PROG(...) > { > bpf_printk("%c\n", arr[a]); > bpf_printk("%c\n", arr[b]); > bpf_printk("%c\n", arr[c]); > bpf_printk("%c\n", arr[d]); > return 0; > } > ``` > By default verification of the program fails: > ``` > ./veristat prog.bpf.o > ``` > By presetting global variables, we can make verification pass: > ``` > ./veristat wq.bpf.o --set-global-vars "a = 0; b = 1; c = 2; d = 3;" > ``` > > Signed-off-by: Mykyta Yatsenko <yatsenko@xxxxxxxx> > --- > tools/testing/selftests/bpf/veristat.c | 189 +++++++++++++++++++++++++ > 1 file changed, 189 insertions(+) > > diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c > index 06af5029885b..65bb8a773d23 100644 > --- a/tools/testing/selftests/bpf/veristat.c > +++ b/tools/testing/selftests/bpf/veristat.c > @@ -154,6 +154,11 @@ struct filter { > bool abs; > }; > > +struct var_preset { > + char *name; > + long long value; > +}; > + > static struct env { > char **filenames; > int filename_cnt; > @@ -195,6 +200,8 @@ static struct env { > int progs_processed; > int progs_skipped; > int top_src_lines; > + struct var_preset *presets; > + int npresets; > } env; > > static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) > @@ -246,12 +253,14 @@ static const struct argp_option opts[] = { > { "test-reg-invariants", 'r', NULL, 0, > "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, > { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, > + { "set-global-vars", 'g', "GLOBALS", 0, "Set global variables provided in the expression, for example \"var1 = 1; var2 = 2\"" }, nit: this is subjective, but I feel like -G instead of -g would be better here to be more noticeable but main point from me here would be to avoid parsing multiple values, it's better to allow repeated -G uses and treat each value as strictly single variable initialization. So instead of: ./veristat wq.bpf.o --set-global-vars "a = 0; b = 1; c = 2; d = 3;" we'll have: ./veristat wq.bpf.o -G "a = 0" -G "b = 1" -G "c = 2" -G "d = 3" A touch more verbose for many variables, but not significantly so. On the other hand, less parsing, and less arbitrary choices of what separator (;) to use. WDYT? pw-bot: cr > {}, > }; > > static int parse_stats(const char *stats_str, struct stat_specs *specs); > static int append_filter(struct filter **filters, int *cnt, const char *str); > static int append_filter_file(const char *path); > +static int parse_var_presets(char *expr, struct var_preset *presets, int capacity, int *size); > > static error_t parse_arg(int key, char *arg, struct argp_state *state) > { > @@ -363,6 +372,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) > return -ENOMEM; > env.filename_cnt++; > break; > + case 'g': { > + char *expr = strdup(arg); > + > + env.presets = calloc(64, sizeof(*env.presets)); > + if (parse_var_presets(expr, env.presets, 64, &env.npresets)) { > + fprintf(stderr, "Could not parse global variables preset: %s\n", arg); > + argp_usage(state); > + } > + free(expr); > + break; > + } > default: > return ARGP_ERR_UNKNOWN; > } > @@ -1292,6 +1312,169 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf > return 0; > }; > > +static int parse_var_presets(char *expr, struct var_preset *presets, int capacity, int *size) > +{ > + char *state; > + char *next; > + int i = 0; > + > + while ((next = strtok_r(i ? NULL : expr, ";", &state))) { > + char *eq_ptr = strchr(next, '='); > + char *name_ptr = next; > + char *name_end = eq_ptr - 1; > + char *val_ptr = eq_ptr + 1; > + > + if (!eq_ptr) > + continue; > + > + if (i >= capacity) { why artificially hard-coding maximum capacity? we have malloc() > + fprintf(stderr, "Too many global variable presets\n"); > + return -EINVAL; > + } > + while (isspace(*name_ptr)) > + ++name_ptr; > + while (isspace(*name_end)) > + --name_end; > + > + *(name_end + 1) = '\0'; > + presets[i].name = strdup(name_ptr); > + errno = 0; > + presets[i].value = strtoll(val_ptr, NULL, 10); > + if (errno == ERANGE) { > + errno = 0; > + presets[i].value = strtoull(val_ptr, NULL, 10); > + } > + if (errno) { > + fprintf(stderr, "Could not parse integer value %s\n", val_ptr); > + return -EINVAL; > + } > + ++i; > + } > + *size = i; > + return 0; > +} > + it would be nice to be able to specify enums both by name and by value, WDYT? > +static bool is_signed_type(const struct btf_type *type) > +{ > + if (btf_is_int(type)) > + return btf_int_encoding(type) & BTF_INT_SIGNED; enum can be signed as well, I think (but different way to specify that, through kflag) > + return true; > +} > + > +static const struct btf_type *var_base_type(const struct btf *btf, const struct btf_type *type) > +{ > + switch (btf_kind(type)) { > + case BTF_KIND_VAR: > + case BTF_KIND_TYPE_TAG: > + case BTF_KIND_CONST: > + case BTF_KIND_VOLATILE: > + case BTF_KIND_RESTRICT: > + case BTF_KIND_TYPEDEF: > + case BTF_KIND_DECL_TAG: > + return var_base_type(btf, btf__type_by_id(btf, type->type)); why recursion, just do a loop? and libbpf actually has "btf__resolve_type()", see if you can just use that? > + } > + return type; > +} > + [...]