On 06/21/2019 06:55 AM, Andrii Nakryiko wrote: > Add ability to attach to kernel and user probes and retprobes. > Implementation depends on perf event support for kprobes/uprobes. > > Signed-off-by: Andrii Nakryiko <andriin@xxxxxx> > --- > tools/lib/bpf/libbpf.c | 207 +++++++++++++++++++++++++++++++++++++++ > tools/lib/bpf/libbpf.h | 8 ++ > tools/lib/bpf/libbpf.map | 2 + > 3 files changed, 217 insertions(+) > > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 2bb1fa008be3..d506772df350 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -3969,6 +3969,213 @@ int bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) > return 0; > } > > +static int parse_uint(const char *buf) > +{ > + int ret; > + > + errno = 0; > + ret = (int)strtol(buf, NULL, 10); > + if (errno) { > + ret = -errno; > + pr_debug("failed to parse '%s' as unsigned int\n", buf); > + return ret; > + } > + if (ret < 0) { > + pr_debug("failed to parse '%s' as unsigned int\n", buf); > + return -EINVAL; > + } > + return ret; > +} > + > +static int parse_uint_from_file(const char* file) > +{ > + char buf[STRERR_BUFSIZE]; > + int fd, ret; > + > + fd = open(file, O_RDONLY); > + if (fd < 0) { > + ret = -errno; > + pr_debug("failed to open '%s': %s\n", file, > + libbpf_strerror_r(ret, buf, sizeof(buf))); > + return ret; > + } > + ret = read(fd, buf, sizeof(buf)); > + ret = ret < 0 ? -errno : ret; > + close(fd); > + if (ret < 0) { > + pr_debug("failed to read '%s': %s\n", file, > + libbpf_strerror_r(ret, buf, sizeof(buf))); > + return ret; > + } > + if (ret == 0 || ret >= sizeof(buf)) { > + buf[sizeof(buf) - 1] = 0; > + pr_debug("unexpected input from '%s': '%s'\n", file, buf); > + return -EINVAL; > + } > + return parse_uint(buf); > +} > + > +static int determine_kprobe_perf_type(void) > +{ > + const char *file = "/sys/bus/event_source/devices/kprobe/type"; > + return parse_uint_from_file(file); > +} > + > +static int determine_uprobe_perf_type(void) > +{ > + const char *file = "/sys/bus/event_source/devices/uprobe/type"; > + return parse_uint_from_file(file); > +} > + > +static int parse_config_from_file(const char *file) > +{ > + char buf[STRERR_BUFSIZE]; > + int fd, ret; > + > + fd = open(file, O_RDONLY); > + if (fd < 0) { > + ret = -errno; > + pr_debug("failed to open '%s': %s\n", file, > + libbpf_strerror_r(ret, buf, sizeof(buf))); > + return ret; > + } > + ret = read(fd, buf, sizeof(buf)); > + ret = ret < 0 ? -errno : ret; > + close(fd); > + if (ret < 0) { > + pr_debug("failed to read '%s': %s\n", file, > + libbpf_strerror_r(ret, buf, sizeof(buf))); > + return ret; > + } > + if (ret == 0 || ret >= sizeof(buf)) { > + buf[sizeof(buf) - 1] = 0; > + pr_debug("unexpected input from '%s': '%s'\n", file, buf); > + return -EINVAL; > + } > + if (strncmp(buf, "config:", 7)) { > + pr_debug("expected 'config:' prefix, found '%s'\n", buf); > + return -EINVAL; > + } > + return parse_uint(buf + 7); > +} > + > +static int determine_kprobe_retprobe_bit(void) > +{ > + const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe"; > + return parse_config_from_file(file); > +} > + > +static int determine_uprobe_retprobe_bit(void) > +{ > + const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; > + return parse_config_from_file(file); > +} > + > +static int perf_event_open_probe(bool uprobe, bool retprobe, const char* name, > + uint64_t offset, int pid) > +{ > + struct perf_event_attr attr = {}; > + char errmsg[STRERR_BUFSIZE]; > + int type, pfd, err; > + > + type = uprobe ? determine_uprobe_perf_type() > + : determine_kprobe_perf_type(); > + if (type < 0) { > + pr_warning("failed to determine %s perf type: %s\n", > + uprobe ? "uprobe" : "kprobe", > + libbpf_strerror_r(type, errmsg, sizeof(errmsg))); > + return type; > + } > + if (retprobe) { > + int bit = uprobe ? determine_uprobe_retprobe_bit() > + : determine_kprobe_retprobe_bit(); > + > + if (bit < 0) { > + pr_warning("failed to determine %s retprobe bit: %s\n", > + uprobe ? "uprobe" : "kprobe", > + libbpf_strerror_r(bit, errmsg, > + sizeof(errmsg))); > + return bit; > + } > + attr.config |= 1 << bit; > + } > + attr.size = sizeof(attr); > + attr.type = type; > + attr.config1 = (uint64_t)(void *)name; /* kprobe_func or uprobe_path */ > + attr.config2 = offset; /* kprobe_addr or probe_offset */ > + > + /* pid filter is meaningful only for uprobes */ > + pfd = syscall(__NR_perf_event_open, &attr, > + pid < 0 ? -1 : pid /* pid */, > + pid == -1 ? 0 : -1 /* cpu */, > + -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); > + if (pfd < 0) { > + err = -errno; > + pr_warning("%s perf_event_open() failed: %s\n", > + uprobe ? "uprobe" : "kprobe", > + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); > + return err; > + } > + return pfd; > +} > + > +int bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, > + const char *func_name) > +{ > + char errmsg[STRERR_BUFSIZE]; > + int pfd, err; > + > + pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, > + 0 /* offset */, -1 /* pid */); > + if (pfd < 0) { > + pr_warning("program '%s': failed to create %s '%s' perf event: %s\n", > + bpf_program__title(prog, false), > + retprobe ? "kretprobe" : "kprobe", func_name, > + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); > + return pfd; > + } > + err = bpf_program__attach_perf_event(prog, pfd); > + if (err) { > + libbpf_perf_event_disable_and_close(pfd); > + pr_warning("program '%s': failed to attach to %s '%s': %s\n", > + bpf_program__title(prog, false), > + retprobe ? "kretprobe" : "kprobe", func_name, > + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); > + return err; > + } > + return pfd; > +} I do like that we facilitate usage by adding these APIs to libbpf, but my $0.02 would be that they should be designed slightly different. See it as a nit, but given it's exposed in libbpf.map and therefore immutable in future it's worth considering; right now with this set here you have: int bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, const char *func_name) int bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset) int bpf_program__attach_tracepoint(struct bpf_program *prog, const char *tp_category, const char *tp_name) int bpf_program__attach_raw_tracepoint(struct bpf_program *prog, const char *tp_name) int bpf_program__attach_perf_event(struct bpf_program *prog, int pfd) int libbpf_perf_event_disable_and_close(int pfd) So the idea is that all the bpf_program__attach_*() APIs return an fd that you can later on pass into libbpf_perf_event_disable_and_close(). I think there is a bit of a disconnect in that the bpf_program__attach_*() APIs try to do too many things at once. For example, the bpf_program__attach_raw_tracepoint() fd has nothing to do with perf, so passing to libbpf_perf_event_disable_and_close() kind of works, but is hacky since there's no PERF_EVENT_IOC_DISABLE for it so this would always error if a user cares to check the return code. In the kernel, we use anon inode for this kind of object. Also, if a user tries to add more than one program to the same event, we need to recreate a new event fd every time. What this boils down to is that this should get a proper abstraction, e.g. as in struct libbpf_event which holds the event object. There should be helper functions like libbpf_event_create_{kprobe,uprobe,tracepoint,raw_tracepoint} returning such an struct libbpf_event object on success, and a single libbpf_event_destroy() that does the event specific teardown. bpf_program__attach_event() can then take care of only attaching the program to it. Having an object for this is also more extensible than just a fd number. Nice thing is that this can also be completely internal to libbpf.c as with struct bpf_program and other abstractions where we don't expose the internals in the public header. Thanks, Daniel