Re: [PATCH bpf-next v3] libbpf: introduce legacy kprobe events support

"Rafael David Tinoco" <rafaeldtinoco@xxxxxxxxx> · Sun, 18 Jul 2021 22:59:21 -0300

> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -10007,6 +10007,10 @@ struct bpf_link {
> >         char *pin_path;         /* NULL, if not pinned */
> >         int fd;                 /* hook FD, -1 if not applicable */
> >         bool disconnected;
> > +       struct {
> > +               char *name;
> > +               bool retprobe;
> > +       } legacy;
>
> we shouldn't extend common bpf_link with kprobe-specific parts. We
> used to have something like this (for other use cases):
>
> struct bpf_link_kprobe {
>     struct bpf_link link;
>     char *legacy_name;
>     bool is_retprobe;
> };

would this:

struct bpf_link {
    int (*detach)(struct bpf_link *link);
    int (*destroy)(struct bpf_link *link);
    char *pin_path;
    int fd;
    bool disconnected;
};

struct bpf_link_kprobe {
    char *legacy_name;
    bool is_retprobe;
    struct bpf_link *link;
};

be ok ?

> And then internally do container_of() to "cast" struct bpf_link to
> struct bpf_link_kprobe. External API should still operate on struct
> bpf_link everywhere.

and what about this:

static struct bpf_link*
bpf_program__attach_kprobe_opts(struct bpf_program *prog,
                                const char *func_name,
                                struct bpf_program_attach_kprobe_opts *opts)
{
	char errmsg[STRERR_BUFSIZE];
	struct bpf_link_kprobe *kplink;
	int pfd, err;
	bool legacy;

	legacy = determine_kprobe_legacy();
	if (!legacy) {
		pfd = perf_event_open_probe(false /* uprobe */,
		                            opts->retprobe,
		                            func_name,
		                            0 /* offset */,
		                            -1 /* pid */);
	} else {
		pfd = perf_event_open_kprobe_legacy(opts->retprobe,
		                                    func_name,
		                                    0 /* offset */,
		                                    -1 /* pid */);
	}
	if (pfd < 0) {
		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
		        prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
		        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
		return libbpf_err_ptr(pfd);
	}
	kplink = calloc(1, sizeof(struct bpf_link_kprobe));
	if (!kplink)
		return libbpf_err_ptr(-ENOMEM);
	kplink->link = bpf_program__attach_perf_event(prog, pfd);
	err = libbpf_get_error(link);
	if (err) {
		free(kplink);
		close(pfd);
		pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
		        prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
		        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
		return libbpf_err_ptr(err);
	}
	if (legacy) {
		kplink->legacy_name = strdup(func_name);
		kplink->is_retprobe = opts->retprobe;
	}

	return kplink->link;
}

And use this 'kplink->link' pointer as the bpf_link pointer for all kprobe
functions. For the detachment we would have something like:

static int bpf_link__detach_perf_event(struct bpf_link *link) {
	struct bpf_link *const *plink;
	struct bpf_link_kprobe *kplink;
	int err;

	plink = (struct bpf_link *const *) link;
	kplink = container_of(plink, struct bpf_link_kprobe, link);
	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
	if (err)
		err = -errno;
	close(link->fd);
	if (kplink) {
		remove_kprobe_event_legacy(kplink->legacy_name, kplink->is_retprobe);
		free(kplink->legacy_name);
		free(kplink);
	}

	return libbpf_err(err);
}

for the bpf_link__detach_perf_event(): This would also clean the container at
the detachment. (Next comment talks about having this here versus having a
legacy kprobe detachment callback).

[snip]

> >  static int bpf_link__detach_perf_event(struct bpf_link *link)
> >  {
> >         int err;
> > @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
> >                 err = -errno;
> >
> >         close(link->fd);
> > +
> > +       if (link->legacy.name) {
> > +               remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> > +               free(link->legacy.name);
> > +       }
>
> instead of this check in bpf_link__detach_perf_event, attach_kprobe
> should install its own bpf_link__detach_kprobe_legacy callback

attach_kprobe_opts() could pass a pointer to link->detach->callback through the
opts I suppose (or now, the kplink->link->detach->callback). This way the
default would still be bpf_link__detach_perf_event() but we could create a
function bpf_link__detach_perf_event_legacy_kprobe() with what was previously
showed (about kplink freeing). This is not needed with the version showed
before the [snip] though.

> > +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
> > +                                       uint64_t offset, int pid)
>
> you are not using offset here, let's pass it into
> add_kprobe_event_legacy and use it when attaching as "p:kprobes/%s
> %s+123" in poke_kprobe_events? There are separate patches that are
> adding ability to attach kprobe at offset, so let's support that
> (internally) from the get go for legacy case as well.
>
> also, it's not generic perf_event_open, it's specifically kprobe, so
> let's call it with kprobe in the name (e.g., kprobe_open_legacy or
> something)

I'm calling it now perf_event_open_kprobe_legacy() and it calls:

static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset)
{
	return poke_kprobe_events(true, name, retprobe, offset);
}

and then we set {kprobes/kretprobes}/funcname_pid, also supporting offset:

static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset) {
	int fd, ret = 0;
	char cmd[192] = {}, probename[128] = {}, probefunc[128] = {};
	const char *file = "/sys/kernel/debug/tracing/kprobe_events";

	if (retprobe)
		ret = snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, getpid());
	else
		ret = snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, getpid());
	if (offset)
		ret = snprintf(probefunc, sizeof(probefunc), "%s+%lu", name, offset);
	if (ret)
		return -EINVAL;
	if (add) {
		snprintf(cmd, sizeof(cmd), "%c:%s %s",
				 retprobe ? 'r' : 'p',
				 probename,
		         offset ? probefunc : name);
	} else {
		snprintf(cmd, sizeof(cmd), "-:%s", probename);
	}
	fd = open(file, O_WRONLY | O_APPEND, 0);
	if (!fd)
		return -errno;
	ret = write(fd, cmd, strlen(cmd));
	if (ret)
		ret = -errno;
	close(fd);

	return ret;
}

[snip]

> > +               pfd = perf_event_open_probe(false /* uprobe */,
> > +                                           retprobe, func_name,
> > +                                            0 /* offset */,
> > +                                           -1 /* pid */);
> > +       else
> > +               pfd = perf_event_open_probe_legacy(false /* uprobe */,
> > +                                           retprobe, func_name,
> > +                                            0 /* offset */,
> > +                                           -1 /* pid */);
> >         if (pfd < 0) {
> >                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
> >                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
>
> we can't use bpf_program__attach_perf_event as is now, because we need
> to allocate a different struct bpf_link_kprobe.

We could do the container encapsulation using heap in
bpf_program_attach_kprobe_opts() or attach_kprobe() like I'm showing here, no ?

...
	kplink = calloc(1, sizeof(struct bpf_link_kprobe));
	if (!kplink)
		return libbpf_err_ptr(-ENOMEM);
	kplink->link = bpf_program__attach_perf_event(prog, pfd);
...

and then free all this structure (bpf_link and its encapsulation at the
detachment, like said previously also). This way we don't have to change
bpf_program__attach_perf_event() which would continue to serve
bpf_program__attach_tracepoint() and bpf_program__attach_uprobe() unmodified.
This way, kprobe would have a container for all cases and uprobe and tracepoint
could have a container in the future if needed.

> Let's extract the
> PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE logic into a helper
> and use it from both bpf_program__attach_perf_event and
> bpf_program__attach_kprobe. It's actually good because we can check
> silly errors (like prog_fd < 0) before we create perf_event FD now.

Okay, but I'm considering this orthogonal to what you said previously (on
changing bpf_program__attach_perf_event). UNLESS you really prefer me to do the
container allocation in bpf_program__attach_perf_event() but then we would have
to free the container in all detachments (kprobe, tracepoint and uprobe) as it
couldn't be placed in stack (or it would eventually be lost, no ?).