Em Thu, Jun 16, 2022 at 10:22:14PM +0200, Jiri Olsa escreveu: > Some functions we use for bpf prologue generation are going to be > deprecated. This change reworks current code not to use them. > > We need to replace following functions/struct: > bpf_program__set_prep > bpf_program__nth_fd > struct bpf_prog_prep_result So this is a combination of those two patches, ok: Before: [root@quaco ~]# perf -v perf version 5.19.rc3.g3af6e6d06631 [root@quaco ~]# perf test bpf 40: LLVM search and compile : 40.1: Basic BPF llvm compile : Ok 40.3: Compile source for BPF prologue generation : Ok 40.4: Compile source for BPF relocation : Ok 42: BPF filter : 42.1: Basic BPF filtering : Ok 42.2: BPF pinning : Ok 42.3: BPF prologue generation : Ok 63: Test libpfm4 support : 96: perf stat --bpf-counters test : Ok [root@quaco ~]# After: [acme@quaco perf-urgent]$ git log --oneline -5 44d22a24d0d49588 (HEAD -> perf/urgent) perf tools: Rework prologue generation code 3af6e6d0663196e7 (five/perf/urgent) perf stat: Enable ignore_missing_thread c3259c85b333af5e perf inject: Adjust output data offset for backward compatibility 8e6445010f7bffbb perf trace beauty: Fix generation of errno id->str table on ALT Linux 57a438eb2aef4f1f perf build-id: Fix caching files with a wrong build ID [acme@quaco perf-urgent]$ [root@quaco ~]# perf -v perf version 5.19.rc3.g44d22a24d0d4 [root@quaco ~]# perf test bpf 40: LLVM search and compile : 40.1: Basic BPF llvm compile : Ok 40.3: Compile source for BPF prologue generation : Ok 40.4: Compile source for BPF relocation : Ok 42: BPF filter : 42.1: Basic BPF filtering : Ok 42.2: BPF pinning : Ok 42.3: BPF prologue generation : Ok 63: Test libpfm4 support : 96: perf stat --bpf-counters test : Ok [root@quaco ~]# And 'perf test' with the augmented_raw_syscalls.c eBPF support also continues to work: [root@quaco ~]# perf trace -e /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,open*,connect* --max-events 20 0.000 systemd-oomd/948 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12 0.151 abrt-dump-jour/1338 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 31 0.096 abrt-dump-jour/1336 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 31 0.092 abrt-dump-jour/1337 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 31 9.024 isc-net-0000/1108 connect(fd: 512, uservaddr: { .family: INET6, port: 53, addr: 2406:8600:f03f:1f8::1003 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 9.728 isc-net-0000/1108 connect(fd: 513, uservaddr: { .family: INET6, port: 53, addr: 2406:2000:1d0::7961:686f:6f21 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 9.811 systemd-journa/712 openat(dfd: CWD, filename: "/proc/1107/comm", flags: RDONLY|CLOEXEC) ... 10.061 isc-net-0000/1108 connect(fd: 514, uservaddr: { .family: INET6, port: 53, addr: 2001:4998:1c0::7961:686f:6f21 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 9.811 systemd-journa/712 ... [continued]: openat()) = 66 10.401 systemd-journa/712 openat(dfd: CWD, filename: "/proc/1107/cmdline", flags: RDONLY|CLOEXEC|NOCTTY) = 66 10.438 isc-net-0000/1108 connect(fd: 513, uservaddr: { .family: INET6, port: 53, addr: 2001:4998:1b0::7961:686f:6f21 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable) 10.803 isc-net-0000/1108 connect(fd: 514, uservaddr: { .family: INET, port: 53, addr: 98.138.11.157 }, addrlen: 16) = 0 10.851 systemd-journa/712 openat(dfd: CWD, filename: "/proc/1107/status", flags: RDONLY|CLOEXEC|NOCTTY) = 66 11.165 systemd-journa/712 openat(dfd: CWD, filename: "/proc/1107/sessionid", flags: RDONLY|CLOEXEC) = 66 11.420 systemd-journa/712 openat(dfd: CWD, filename: "/proc/1107/loginuid", flags: RDONLY|CLOEXEC) = 66 11.631 systemd-journa/712 openat(dfd: CWD, filename: "/proc/1107/cgroup", flags: RDONLY|CLOEXEC) = 66 11.903 systemd-journa/712 openat(dfd: CWD, filename: "/run/systemd/units/log-extra-fields:named.service", flags: RDONLY|CLOEXEC) = -1 ENOENT (No such file or directory) 12.085 systemd-journa/712 openat(dfd: CWD, filename: "/run/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDWR|CLOEXEC|NONBLOCK) = -1 ENOENT (No such file or directory) 12.849 systemd-journa/712 openat(dfd: CWD, filename: "/run/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDWR|CLOEXEC|NONBLOCK) = -1 ENOENT (No such file or directory) 13.141 systemd-journa/712 openat(dfd: CWD, filename: "/run/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDWR|CLOEXEC|NONBLOCK) = -1 ENOENT (No such file or directory) [root@quaco ~]# [root@quaco ~]# grep -A2 SEC /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c SEC("!raw_syscalls:unaugmented") int syscall_unaugmented(struct syscall_enter_args *args) { -- * These will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in * augmented_args_tmp what was read by that raw_syscalls:sys_enter and go * on from there, reading the first syscall arg as a string, i.e. open's -- SEC("!syscalls:sys_enter_connect") int sys_enter_connect(struct syscall_enter_args *args) { -- SEC("!syscalls:sys_enter_sendto") int sys_enter_sendto(struct syscall_enter_args *args) { -- SEC("!syscalls:sys_enter_open") int sys_enter_open(struct syscall_enter_args *args) { -- SEC("!syscalls:sys_enter_openat") int sys_enter_openat(struct syscall_enter_args *args) { -- SEC("!syscalls:sys_enter_rename") int sys_enter_rename(struct syscall_enter_args *args) { -- SEC("!syscalls:sys_enter_renameat") int sys_enter_renameat(struct syscall_enter_args *args) { -- SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { -- SEC("raw_syscalls:sys_exit") int sys_exit(struct syscall_exit_args *args) { [root@quaco ~]# Thanks, you can have my: Tested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Stronger than an: Acked-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> - Arnaldo > Currently we use bpf_program__set_prep to hook perf callback before > program is loaded and provide new instructions with the prologue. > > We replace this function/ality by taking instructions for specific > program, attaching prologue to them and load such new ebpf programs > with prologue using separate bpf_prog_load calls (outside libbpf > load machinery). > > Before we can take and use program instructions, we need libbpf to > actually load it. This way we get the final shape of its instructions > with all relocations and verifier adjustments). > > There's one glitch though.. perf kprobe program already assumes > generated prologue code with proper values in argument registers, > so loading such program directly will fail in the verifier. > > That's where the fallback pre-load handler fits in and prepends > the initialization code to the program. Once such program is loaded > we take its instructions, cut off the initialization code and prepend > the prologue. > > I know.. sorry ;-) > > To have access to the program when loading this patch adds support to > register 'fallback' section handler to take care of perf kprobe programs. > The fallback means that it handles any section definition besides the > ones that libbpf handles. > > The handler serves two purposes: > - allows perf programs to have special arguments in section name > - allows perf to use pre-load callback where we can attach init > code (zeroing all argument registers) to each perf program > > Suggested-by: Andrii Nakryiko <andrii@xxxxxxxxxx> > Signed-off-by: Jiri Olsa <jolsa@xxxxxxxxxx> > --- > tools/perf/util/bpf-loader.c | 204 ++++++++++++++++++++++++++++++----- > 1 file changed, 175 insertions(+), 29 deletions(-) > > diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c > index f8ad581ea247..6bd7c288e820 100644 > --- a/tools/perf/util/bpf-loader.c > +++ b/tools/perf/util/bpf-loader.c > @@ -9,6 +9,7 @@ > #include <linux/bpf.h> > #include <bpf/libbpf.h> > #include <bpf/bpf.h> > +#include <linux/filter.h> > #include <linux/err.h> > #include <linux/kernel.h> > #include <linux/string.h> > @@ -49,6 +50,7 @@ struct bpf_prog_priv { > struct bpf_insn *insns_buf; > int nr_types; > int *type_mapping; > + int *prologue_fds; > }; > > struct bpf_perf_object { > @@ -56,6 +58,11 @@ struct bpf_perf_object { > struct bpf_object *obj; > }; > > +struct bpf_preproc_result { > + struct bpf_insn *new_insn_ptr; > + int new_insn_cnt; > +}; > + > static LIST_HEAD(bpf_objects_list); > static struct hashmap *bpf_program_hash; > static struct hashmap *bpf_map_hash; > @@ -86,6 +93,7 @@ bpf_perf_object__next(struct bpf_perf_object *prev) > (perf_obj) = (tmp), (tmp) = bpf_perf_object__next(tmp)) > > static bool libbpf_initialized; > +static int libbpf_sec_handler; > > static int bpf_perf_object__add(struct bpf_object *obj) > { > @@ -99,12 +107,76 @@ static int bpf_perf_object__add(struct bpf_object *obj) > return perf_obj ? 0 : -ENOMEM; > } > > +static void *program_priv(const struct bpf_program *prog) > +{ > + void *priv; > + > + if (IS_ERR_OR_NULL(bpf_program_hash)) > + return NULL; > + if (!hashmap__find(bpf_program_hash, prog, &priv)) > + return NULL; > + return priv; > +} > + > +static struct bpf_insn prologue_init_insn[] = { > + BPF_MOV64_IMM(BPF_REG_2, 0), > + BPF_MOV64_IMM(BPF_REG_3, 0), > + BPF_MOV64_IMM(BPF_REG_4, 0), > + BPF_MOV64_IMM(BPF_REG_5, 0), > +}; > + > +static int libbpf_prog_prepare_load_fn(struct bpf_program *prog, > + struct bpf_prog_load_opts *opts __maybe_unused, > + long cookie __maybe_unused) > +{ > + size_t init_size_cnt = ARRAY_SIZE(prologue_init_insn); > + size_t orig_insn_cnt, insn_cnt, init_size, orig_size; > + struct bpf_prog_priv *priv = program_priv(prog); > + const struct bpf_insn *orig_insn; > + struct bpf_insn *insn; > + > + if (IS_ERR_OR_NULL(priv)) { > + pr_debug("bpf: failed to get private field\n"); > + return -BPF_LOADER_ERRNO__INTERNAL; > + } > + > + if (!priv->need_prologue) > + return 0; > + > + /* prepend initialization code to program instructions */ > + orig_insn = bpf_program__insns(prog); > + orig_insn_cnt = bpf_program__insn_cnt(prog); > + init_size = init_size_cnt * sizeof(*insn); > + orig_size = orig_insn_cnt * sizeof(*insn); > + > + insn_cnt = orig_insn_cnt + init_size_cnt; > + insn = malloc(insn_cnt * sizeof(*insn)); > + if (!insn) > + return -ENOMEM; > + > + memcpy(insn, prologue_init_insn, init_size); > + memcpy((char *) insn + init_size, orig_insn, orig_size); > + bpf_program__set_insns(prog, insn, insn_cnt); > + return 0; > +} > + > static int libbpf_init(void) > { > + LIBBPF_OPTS(libbpf_prog_handler_opts, handler_opts, > + .prog_prepare_load_fn = libbpf_prog_prepare_load_fn, > + ); > + > if (libbpf_initialized) > return 0; > > libbpf_set_print(libbpf_perf_print); > + libbpf_sec_handler = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_KPROBE, > + 0, &handler_opts); > + if (libbpf_sec_handler < 0) { > + pr_debug("bpf: failed to register libbpf section handler: %d\n", > + libbpf_sec_handler); > + return -BPF_LOADER_ERRNO__INTERNAL; > + } > libbpf_initialized = true; > return 0; > } > @@ -188,14 +260,31 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) > return obj; > } > > +static void close_prologue_programs(struct bpf_prog_priv *priv) > +{ > + struct perf_probe_event *pev; > + int i, fd; > + > + if (!priv->need_prologue) > + return; > + pev = &priv->pev; > + for (i = 0; i < pev->ntevs; i++) { > + fd = priv->prologue_fds[i]; > + if (fd != -1) > + close(fd); > + } > +} > + > static void > clear_prog_priv(const struct bpf_program *prog __maybe_unused, > void *_priv) > { > struct bpf_prog_priv *priv = _priv; > > + close_prologue_programs(priv); > cleanup_perf_probe_events(&priv->pev, 1); > zfree(&priv->insns_buf); > + zfree(&priv->prologue_fds); > zfree(&priv->type_mapping); > zfree(&priv->sys_name); > zfree(&priv->evt_name); > @@ -243,17 +332,6 @@ static bool ptr_equal(const void *key1, const void *key2, > return key1 == key2; > } > > -static void *program_priv(const struct bpf_program *prog) > -{ > - void *priv; > - > - if (IS_ERR_OR_NULL(bpf_program_hash)) > - return NULL; > - if (!hashmap__find(bpf_program_hash, prog, &priv)) > - return NULL; > - return priv; > -} > - > static int program_set_priv(struct bpf_program *prog, void *priv) > { > void *old_priv; > @@ -558,8 +636,8 @@ static int bpf__prepare_probe(void) > > static int > preproc_gen_prologue(struct bpf_program *prog, int n, > - struct bpf_insn *orig_insns, int orig_insns_cnt, > - struct bpf_prog_prep_result *res) > + const struct bpf_insn *orig_insns, int orig_insns_cnt, > + struct bpf_preproc_result *res) > { > struct bpf_prog_priv *priv = program_priv(prog); > struct probe_trace_event *tev; > @@ -607,7 +685,6 @@ preproc_gen_prologue(struct bpf_program *prog, int n, > > res->new_insn_ptr = buf; > res->new_insn_cnt = prologue_cnt + orig_insns_cnt; > - res->pfd = NULL; > return 0; > > errout: > @@ -715,7 +792,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) > struct bpf_prog_priv *priv = program_priv(prog); > struct perf_probe_event *pev; > bool need_prologue = false; > - int err, i; > + int i; > > if (IS_ERR_OR_NULL(priv)) { > pr_debug("Internal error when hook preprocessor\n"); > @@ -753,6 +830,13 @@ static int hook_load_preprocessor(struct bpf_program *prog) > return -ENOMEM; > } > > + priv->prologue_fds = malloc(sizeof(int) * pev->ntevs); > + if (!priv->prologue_fds) { > + pr_debug("Not enough memory: alloc prologue fds failed\n"); > + return -ENOMEM; > + } > + memset(priv->prologue_fds, -1, sizeof(int) * pev->ntevs); > + > priv->type_mapping = malloc(sizeof(int) * pev->ntevs); > if (!priv->type_mapping) { > pr_debug("Not enough memory: alloc type_mapping failed\n"); > @@ -761,13 +845,7 @@ static int hook_load_preprocessor(struct bpf_program *prog) > memset(priv->type_mapping, -1, > sizeof(int) * pev->ntevs); > > - err = map_prologue(pev, priv->type_mapping, &priv->nr_types); > - if (err) > - return err; > - > - err = bpf_program__set_prep(prog, priv->nr_types, > - preproc_gen_prologue); > - return err; > + return map_prologue(pev, priv->type_mapping, &priv->nr_types); > } > > int bpf__probe(struct bpf_object *obj) > @@ -874,6 +952,77 @@ int bpf__unprobe(struct bpf_object *obj) > return ret; > } > > +static int bpf_object__load_prologue(struct bpf_object *obj) > +{ > + int init_cnt = ARRAY_SIZE(prologue_init_insn); > + const struct bpf_insn *orig_insns; > + struct bpf_preproc_result res; > + struct perf_probe_event *pev; > + struct bpf_program *prog; > + int orig_insns_cnt; > + > + bpf_object__for_each_program(prog, obj) { > + struct bpf_prog_priv *priv = program_priv(prog); > + int err, i, fd; > + > + if (IS_ERR_OR_NULL(priv)) { > + pr_debug("bpf: failed to get private field\n"); > + return -BPF_LOADER_ERRNO__INTERNAL; > + } > + > + if (!priv->need_prologue) > + continue; > + > + /* > + * For each program that needs prologue we do following: > + * > + * - take its current instructions and use them > + * to generate the new code with prologue > + * - load new instructions with bpf_prog_load > + * and keep the fd in prologue_fds > + * - new fd will be used in bpf__foreach_event > + * to connect this program with perf evsel > + */ > + orig_insns = bpf_program__insns(prog); > + orig_insns_cnt = bpf_program__insn_cnt(prog); > + > + pev = &priv->pev; > + for (i = 0; i < pev->ntevs; i++) { > + /* > + * Skipping artificall prologue_init_insn instructions > + * (init_cnt), so the prologue can be generated instead > + * of them. > + */ > + err = preproc_gen_prologue(prog, i, > + orig_insns + init_cnt, > + orig_insns_cnt - init_cnt, > + &res); > + if (err) > + return err; > + > + fd = bpf_prog_load(bpf_program__get_type(prog), > + bpf_program__name(prog), "GPL", > + res.new_insn_ptr, > + res.new_insn_cnt, NULL); > + if (fd < 0) { > + char bf[128]; > + > + libbpf_strerror(-errno, bf, sizeof(bf)); > + pr_debug("bpf: load objects with prologue failed: err=%d: (%s)\n", > + -errno, bf); > + return -errno; > + } > + priv->prologue_fds[i] = fd; > + } > + /* > + * We no longer need the original program, > + * we can unload it. > + */ > + bpf_program__unload(prog); > + } > + return 0; > +} > + > int bpf__load(struct bpf_object *obj) > { > int err; > @@ -885,7 +1034,7 @@ int bpf__load(struct bpf_object *obj) > pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf); > return err; > } > - return 0; > + return bpf_object__load_prologue(obj); > } > > int bpf__foreach_event(struct bpf_object *obj, > @@ -920,13 +1069,10 @@ int bpf__foreach_event(struct bpf_object *obj, > for (i = 0; i < pev->ntevs; i++) { > tev = &pev->tevs[i]; > > - if (priv->need_prologue) { > - int type = priv->type_mapping[i]; > - > - fd = bpf_program__nth_fd(prog, type); > - } else { > + if (priv->need_prologue) > + fd = priv->prologue_fds[i]; > + else > fd = bpf_program__fd(prog); > - } > > if (fd < 0) { > pr_debug("bpf: failed to get file descriptor\n"); > -- > 2.35.3 -- - Arnaldo