Hello Ian, Thanks for doing this. On Fri, Jan 31, 2025 at 11:15 PM Ian Rogers <irogers@xxxxxxxxxx> wrote: > > Identify struct syscall information in the syscalls table by a machine > type and syscall number, not just system call number. Having the > machine type means that 32-bit system calls can be differentiated from > 64-bit ones on a machine capable of both. Having a table for all > machine types and all system call numbers would be too large, so > maintain a sorted array of system calls as they are encountered. > > Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx> > --- > tools/perf/builtin-trace.c | 178 +++++++++++++++++++++++++------------ > 1 file changed, 119 insertions(+), 59 deletions(-) > > diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c > index ac97632f13dc..10d3ad0d21f7 100644 > --- a/tools/perf/builtin-trace.c > +++ b/tools/perf/builtin-trace.c > @@ -66,6 +66,7 @@ > #include "rb_resort.h" > #include "../perf.h" > #include "trace_augment.h" > +#include "dwarf-regs.h" > > #include <errno.h> > #include <inttypes.h> > @@ -86,6 +87,7 @@ > > #include <linux/ctype.h> > #include <perf/mmap.h> > +#include <tools/libc_compat.h> > > #ifdef HAVE_LIBTRACEEVENT > #include <event-parse.h> > @@ -143,7 +145,10 @@ struct trace { > struct perf_tool tool; > struct syscalltbl *sctbl; > struct { > + /** Sorted sycall numbers used by the trace. */ > struct syscall *table; > + /** Size of table. */ > + size_t table_size; > struct { > struct evsel *sys_enter, > *sys_exit, > @@ -1445,22 +1450,37 @@ static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) > return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias); > } > > -/* > - * is_exit: is this "exit" or "exit_group"? > - * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. > - * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. > - * nonexistent: Just a hole in the syscall table, syscall id not allocated > +/** > + * struct syscall > */ > struct syscall { > + /** @e_machine: The ELF machine associated with the entry. */ > + int e_machine; > + /** @id: id value from the tracepoint, the system call number. */ > + int id; > struct tep_event *tp_format; > int nr_args; > + /** > + * @args_size: sum of the sizes of the syscall arguments, anything > + * after that is augmented stuff: pathname for openat, etc. > + */ > + > int args_size; > struct { > struct bpf_program *sys_enter, > *sys_exit; > } bpf_prog; > + /** @is_exit: is this "exit" or "exit_group"? */ > bool is_exit; > + /** > + * @is_open: is this "open" or "openat"? To associate the fd returned in > + * sys_exit with the pathname in sys_enter. > + */ > bool is_open; > + /** > + * @nonexistent: Name lookup failed. Just a hole in the syscall table, > + * syscall id not allocated. > + */ > bool nonexistent; > bool use_btf; > struct tep_format_field *args; > @@ -2066,22 +2086,21 @@ static int syscall__set_arg_fmts(struct syscall *sc) > return 0; > } > > -static int trace__read_syscall_info(struct trace *trace, int id) > +static int syscall__read_info(struct syscall *sc, struct trace *trace) > { > char tp_name[128]; > - struct syscall *sc; > - const char *name = syscalltbl__name(trace->sctbl, id); > + const char *name; > int err; > > - if (trace->syscalls.table == NULL) { > - trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); > - if (trace->syscalls.table == NULL) > - return -ENOMEM; > - } > - sc = trace->syscalls.table + id; > if (sc->nonexistent) > return -EEXIST; > > + if (sc->name) { > + /* Info already read. */ > + return 0; > + } > + > + name = syscalltbl__name(trace->sctbl, sc->id); > if (name == NULL) { > sc->nonexistent = true; > return -EEXIST; > @@ -2104,11 +2123,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) > */ > if (IS_ERR(sc->tp_format)) { > sc->nonexistent = true; > - return PTR_ERR(sc->tp_format); > + err = PTR_ERR(sc->tp_format); > + sc->tp_format = NULL; > + return err; > } > > - if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? > - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) > + if (syscall__alloc_arg_fmts(sc, sc->tp_format->format.nr_fields)) Makes sense because IS_ERR(sc->tp_format) is checked a few lines above. However, nr_fields can be 7 with the __syscall_nr prefix. I sent a patch to fix this, but it's not included here... Never mind, it's off-topic. This patch looks good. Thanks, Howard > return -ENOMEM; > > sc->args = sc->tp_format->format.fields; > @@ -2397,13 +2417,67 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, > return printed; > } > > +static void syscall__init(struct syscall *sc, int e_machine, int id) > +{ > + memset(sc, 0, sizeof(*sc)); > + sc->e_machine = e_machine; > + sc->id = id; > +} > + > +static void syscall__exit(struct syscall *sc) > +{ > + if (!sc) > + return; > + > + zfree(&sc->arg_fmt); > +} > + > +static int syscall__cmp(const void *va, const void *vb) > +{ > + const struct syscall *a = va, *b = vb; > + > + if (a->e_machine != b->e_machine) > + return a->e_machine - b->e_machine; > + > + return a->id - b->id; > +} > + > +static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, int id) > +{ > + struct syscall key = { > + .e_machine = e_machine, > + .id = id, > + }; > + struct syscall *sc, *tmp; > + > + sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size, > + sizeof(struct syscall), syscall__cmp); > + if (sc) > + return sc; > + > + tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1, > + sizeof(struct syscall)); > + if (!tmp) > + return NULL; > + > + trace->syscalls.table = tmp; > + sc = &trace->syscalls.table[trace->syscalls.table_size++]; > + syscall__init(sc, e_machine, id); > + qsort(trace->syscalls.table, trace->syscalls.table_size, sizeof(struct syscall), > + syscall__cmp); > + sc = bsearch(&key, trace->syscalls.table, trace->syscalls.table_size, > + sizeof(struct syscall), syscall__cmp); > + return sc; > +} > + > typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel, > union perf_event *event, > struct perf_sample *sample); > > -static struct syscall *trace__syscall_info(struct trace *trace, > - struct evsel *evsel, int id) > +static struct syscall *trace__syscall_info(struct trace *trace, struct evsel *evsel, > + int e_machine, int id) > { > + struct syscall *sc; > int err = 0; > > if (id < 0) { > @@ -2428,28 +2502,20 @@ static struct syscall *trace__syscall_info(struct trace *trace, > > err = -EINVAL; > > - if (id > trace->sctbl->syscalls.max_id) { > - goto out_cant_read; > - } > - > - if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && > - (err = trace__read_syscall_info(trace, id)) != 0) > - goto out_cant_read; > + sc = trace__find_syscall(trace, e_machine, id); > + if (sc) > + err = syscall__read_info(sc, trace); > > - if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) > - goto out_cant_read; > - > - return &trace->syscalls.table[id]; > - > -out_cant_read: > - if (verbose > 0) { > + if (err && verbose > 0) { > char sbuf[STRERR_BUFSIZE]; > - fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf))); > - if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL) > - fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); > + > + fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, > + str_error_r(-err, sbuf, sizeof(sbuf))); > + if (sc && sc->name) > + fprintf(trace->output, "(%s)", sc->name); > fputs(" information\n", trace->output); > } > - return NULL; > + return err ? NULL : sc; > } > > struct syscall_stats { > @@ -2596,14 +2662,6 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam > return NULL; > } > > -static void syscall__exit(struct syscall *sc) > -{ > - if (!sc) > - return; > - > - zfree(&sc->arg_fmt); > -} > - > static int trace__sys_enter(struct trace *trace, struct evsel *evsel, > union perf_event *event __maybe_unused, > struct perf_sample *sample) > @@ -2615,7 +2673,7 @@ static int trace__sys_enter(struct trace *trace, struct evsel *evsel, > int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; > int augmented_args_size = 0; > void *augmented_args = NULL; > - struct syscall *sc = trace__syscall_info(trace, evsel, id); > + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id); > struct thread_trace *ttrace; > > if (sc == NULL) > @@ -2689,7 +2747,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, > struct thread_trace *ttrace; > struct thread *thread; > int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; > - struct syscall *sc = trace__syscall_info(trace, evsel, id); > + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id); > char msg[1024]; > void *args, *augmented_args = NULL; > int augmented_args_size; > @@ -2764,7 +2822,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel, > struct thread *thread; > int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0; > int alignment = trace->args_alignment; > - struct syscall *sc = trace__syscall_info(trace, evsel, id); > + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id); > struct thread_trace *ttrace; > > if (sc == NULL) > @@ -3117,7 +3175,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, > > if (evsel == trace->syscalls.events.bpf_output) { > int id = perf_evsel__sc_tp_uint(evsel, id, sample); > - struct syscall *sc = trace__syscall_info(trace, evsel, id); > + struct syscall *sc = trace__syscall_info(trace, evsel, EM_HOST, id); > > if (sc) { > fprintf(trace->output, "%s(", sc->name); > @@ -3622,7 +3680,7 @@ static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, str > > static void trace__init_syscall_bpf_progs(struct trace *trace, int id) > { > - struct syscall *sc = trace__syscall_info(trace, NULL, id); > + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id); > > if (sc == NULL) > return; > @@ -3633,20 +3691,20 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id) > > static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) > { > - struct syscall *sc = trace__syscall_info(trace, NULL, id); > + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id); > return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); > } > > static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) > { > - struct syscall *sc = trace__syscall_info(trace, NULL, id); > + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, id); > return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); > } > > static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array) > { > struct tep_format_field *field; > - struct syscall *sc = trace__syscall_info(trace, NULL, key); > + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key); > const struct btf_type *bt; > char *struct_offset, *tmp, name[32]; > bool can_augment = false; > @@ -3744,7 +3802,7 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace > try_to_find_pair: > for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { > int id = syscalltbl__id_at_idx(trace->sctbl, i); > - struct syscall *pair = trace__syscall_info(trace, NULL, id); > + struct syscall *pair = trace__syscall_info(trace, NULL, EM_HOST, id); > struct bpf_program *pair_prog; > bool is_candidate = false; > > @@ -3894,7 +3952,7 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) > */ > for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { > int key = syscalltbl__id_at_idx(trace->sctbl, i); > - struct syscall *sc = trace__syscall_info(trace, NULL, key); > + struct syscall *sc = trace__syscall_info(trace, NULL, EM_HOST, key); > struct bpf_program *pair_prog; > int prog_fd; > > @@ -4659,7 +4717,11 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, > pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0; > avg /= NSEC_PER_MSEC; > > - sc = &trace->syscalls.table[syscall_stats_entry->syscall]; > + sc = trace__syscall_info(trace, /*evsel=*/NULL, EM_HOST, > + syscall_stats_entry->syscall); > + if (!sc) > + continue; > + > printed += fprintf(fp, " %-15s", sc->name); > printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f", > n, stats->nr_failures, syscall_stats_entry->msecs, min, avg); > @@ -5067,12 +5129,10 @@ static int trace__config(const char *var, const char *value, void *arg) > > static void trace__exit(struct trace *trace) > { > - int i; > - > strlist__delete(trace->ev_qualifier); > zfree(&trace->ev_qualifier_ids.entries); > if (trace->syscalls.table) { > - for (i = 0; i <= trace->sctbl->syscalls.max_id; i++) > + for (size_t i = 0; i < trace->syscalls.table_size; i++) > syscall__exit(&trace->syscalls.table[i]); > zfree(&trace->syscalls.table); > } > -- > 2.48.1.362.g079036d154-goog >