Re: [PATCH v5 00/11] perf: Support multiple system call tables in the build

Ian Rogers <irogers@xxxxxxxxxx> · Mon, 17 Mar 2025 08:01:55 -0700

On Sat, Mar 15, 2025 at 4:02 PM Namhyung Kim <namhyung@xxxxxxxxxx> wrote:
>
> On Fri, Mar 14, 2025 at 05:48:12PM -0300, Arnaldo Carvalho de Melo wrote:
> > On Fri, Mar 14, 2025 at 02:26:41PM -0300, Arnaldo Carvalho de Melo wrote:
> > > it finds the pair, but then its sc->args has a bogus pointer... I'll see
> > > where this isn't being initialized...
> >
> > Breakpoint 4, trace__find_usable_bpf_prog_entry (trace=0x7fffffffa510, sc=0x1046f10) at builtin-trace.c:3874
> > 3874                  bool is_candidate = false;
> > (gdb) n
> > 3876                  if (pair == NULL || pair == sc ||
> > (gdb) p pair
> > $7 = (struct syscall *) 0x1083c50
> > (gdb) p pair->name
> > $8 = 0x81478e "accept4"
> > (gdb) n
> > 3877                      pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
> > (gdb) p i
> > $9 = 1
> > (gdb) n
> > 3876                  if (pair == NULL || pair == sc ||
> > (gdb) n
> > 3880                  printf("sc=%p\n", sc); fflush(stdout);
> > (gdb) n
> > sc=0x1046f10
> > 3881                  printf("sc->name=%p\n", sc->name); fflush(stdout);
> > (gdb) n
> > sc->name=0x6c66202c786c3830
> > 3882                  printf("sc->nr_args=%d, sc->args=%p\n", sc->nr_args, sc->args); fflush(stdout);
> > (gdb) p sc->nr_args
> > $10 = 1935635045
> > (gdb) p sc->args
> > $11 = (struct tep_format_field *) 0x257830203a6e656c
> > (gdb) p *sc
> > $12 = {e_machine = 540697702, id = 807761968, tp_format = 0x657075202c786c38, nr_args = 1935635045, args_size = 1634427759, bpf_prog = {sys_enter = 0x257830203a726464,
> >     sys_exit = 0x7075202c786c3830}, is_exit = 101, is_open = 101, nonexistent = 114, use_btf = 95, args = 0x257830203a6e656c,
> >   name = 0x6c66202c786c3830 <error: Cannot access memory at address 0x6c66202c786c3830>, fmt = 0x257830203a736761, arg_fmt = 0x786c3830}
> > (gdb)
> >
> > Ok, ran out of time, but if I simple avoid the second loop in:
> >
> > static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine)
> >
> >
> > I.e. the one that starts with:
> >
> >         /*
> >          * Now lets do a second pass looking for enabled syscalls without
> >          * an augmenter that have a signature that is a superset of another
> >          * syscall with an augmenter so that we can auto-reuse it.
> >
> > This:
> >
> > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> > index e0434f7dc67cb988..3664bb512c70cabf 100644
> > --- a/tools/perf/builtin-trace.c
> > +++ b/tools/perf/builtin-trace.c
> > @@ -3989,6 +3989,8 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
> >                         goto out;
> >         }
> >
> > +       return 0;
> > +
> >         /*
> >          * Now lets do a second pass looking for enabled syscalls without
> >          * an augmenter that have a signature that is a superset of another
> > ⬢ [acme@toolbox perf-tools-next]$
> >
> >
> > Then all works, we don't reuse any BPF program, but then that is an
> > heuristic anyway, that is tried becuase landlock_add_rule has a pointer
> > argument:
> >
> > root@number:~# perf trace -e landlock_add_rule perf test -w landlock
> >      0.000 ( 0.003 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_PATH_BENEATH, rule_attr: 0x7fff6f2bb550, flags: 45) = -1 EINVAL (Invalid argument)
> >      0.004 ( 0.001 ms): perf/71034 landlock_add_rule(ruleset_fd: 11, rule_type: LANDLOCK_RULE_NET_PORT, rule_attr: 0x7fff6f2bb540, flags: 45) = -1 EINVAL (Invalid argument)
> > root@number:~# perf test enum
> > 105: perf trace enum augmentation tests                              : Ok
> > root@number:~#
> >
> > So its some sort of syncronization on the various new tables, sorted by
> > name, etc that then when iterating over the syscalls ends up using a sc
> > that is not initialized.
>
> Right, I've realized that calling trace__syscall_info() can invalidate
> the existing sc since it calls trace__find_syscall() which reallocates
> and resorts the syscall table.  That's why it was ok when no filter was
> used since it'd allocate the whole table in the first pass.  Otherwise
> it looks for a pair syscall while holding the original sc but calling
> the function would invalidate the sc.
>
> What about this (on top of my earlier fix)?

LGTM.
Reviewed-by: Ian Rogers <irogers@xxxxxxxxxx>

Thanks,
Ian

> Thanks,
> Namhyung
>
>
> ---8<---
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index 49199d753b7cafbf..da0ddc713e6b35da 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -2506,10 +2506,12 @@ static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, i
>         };
>         struct syscall *sc, *tmp;
>
> -       sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
> -                    sizeof(struct syscall), syscall__cmp);
> -       if (sc)
> -               return sc;
> +       if (trace->syscalls.table) {
> +               sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size,
> +                            sizeof(struct syscall), syscall__cmp);
> +               if (sc)
> +                       return sc;
> +       }
>
>         tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1,
>                            sizeof(struct syscall));
> @@ -3855,6 +3857,10 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, i
>
>  static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
>  {
> +       int orig_id = sc->id;
> +       const char *orig_name = sc->name;
> +       int e_machine = sc->e_machine;
> +       struct tep_format_field *args = sc->args;
>         struct tep_format_field *field, *candidate_field;
>         /*
>          * We're only interested in syscalls that have a pointer:
> @@ -3866,18 +3872,19 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
>
>         return NULL;
>
> +       /* calling trace__syscall_info() may invalidate 'sc' */
>  try_to_find_pair:
> -       for (int i = 0, num_idx = syscalltbl__num_idx(sc->e_machine); i < num_idx; ++i) {
> -               int id = syscalltbl__id_at_idx(sc->e_machine, i);
> -               struct syscall *pair = trace__syscall_info(trace, NULL, sc->e_machine, id);
> +       for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) {
> +               int id = syscalltbl__id_at_idx(e_machine, i);
> +               struct syscall *pair = trace__syscall_info(trace, NULL, e_machine, id);
>                 struct bpf_program *pair_prog;
>                 bool is_candidate = false;
>
> -               if (pair == NULL || pair == sc ||
> +               if (pair == NULL || pair->id == orig_id ||
>                     pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented)
>                         continue;
>
> -               for (field = sc->args, candidate_field = pair->args;
> +               for (field = args, candidate_field = pair->args;
>                      field && candidate_field; field = field->next, candidate_field = candidate_field->next) {
>                         bool is_pointer = field->flags & TEP_FIELD_IS_POINTER,
>                              candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER;
> @@ -3944,7 +3951,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace
>                                 goto next_candidate;
>                 }
>
> -               pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name);
> +               pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, orig_name);
>                 return pair_prog;
>         next_candidate:
>                 continue;
> @@ -4041,6 +4048,11 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_m
>                 if (pair_prog == NULL)
>                         continue;
>
> +               /*
> +                * Get syscall info again as find usable entry above might
> +                * modify the syscall table and shuffle it.
> +                */
> +               sc = trace__syscall_info(trace, NULL, e_machine, key);
>                 sc->bpf_prog.sys_enter = pair_prog;
>
>                 /*
>