Re: [PATCH bpf-next v5 4/8] bpf: support attaching freplace programs to multiple attach points

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Sep 15, 2020 at 5:50 PM Toke Høiland-Jørgensen <toke@xxxxxxxxxx> wrote:
>
> From: Toke Høiland-Jørgensen <toke@xxxxxxxxxx>
>
> This enables support for attaching freplace programs to multiple attach
> points. It does this by amending the UAPI for bpf_link_Create with a target
> btf ID that can be used to supply the new attachment point along with the
> target program fd. The target must be compatible with the target that was
> supplied at program load time.
>
> The implementation reuses the checks that were factored out of
> check_attach_btf_id() to ensure compatibility between the BTF types of the
> old and new attachment. If these match, a new bpf_tracing_link will be
> created for the new attach target, allowing multiple attachments to
> co-exist simultaneously.
>
> The code could theoretically support multiple-attach of other types of
> tracing programs as well, but since I don't have a use case for any of
> those, there is no API support for doing so.
>
> Signed-off-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx>
> ---
>  include/linux/bpf.h            |    2 +
>  include/uapi/linux/bpf.h       |    2 +
>  kernel/bpf/syscall.c           |   92 ++++++++++++++++++++++++++++++++++------
>  kernel/bpf/verifier.c          |    9 ++++
>  tools/include/uapi/linux/bpf.h |    2 +
>  5 files changed, 94 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 939b37c78d55..360e8291e6bb 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -743,6 +743,8 @@ struct bpf_prog_aux {
>         struct mutex tgt_mutex; /* protects writing of tgt_* pointers below */
>         struct bpf_prog *tgt_prog;
>         struct bpf_trampoline *tgt_trampoline;
> +       enum bpf_prog_type tgt_prog_type;
> +       enum bpf_attach_type tgt_attach_type;
>         bool verifier_zext; /* Zero extensions has been inserted by verifier. */
>         bool offload_requested;
>         bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 7dd314176df7..46eaa3024dc3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -239,6 +239,7 @@ enum bpf_attach_type {
>         BPF_XDP_CPUMAP,
>         BPF_SK_LOOKUP,
>         BPF_XDP,
> +       BPF_TRACE_FREPLACE,
>         __MAX_BPF_ATTACH_TYPE
>  };
>
> @@ -633,6 +634,7 @@ union bpf_attr {
>                 __u32           flags;          /* extra flags */
>                 __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
>                 __u32           iter_info_len;  /* iter_info length */
> +               __u32           target_btf_id;  /* btf_id of target to attach to */

iter_info + iter_info_len are mutually exclusive with target_btf_id,
they should be inside a union with each other

>         } link_create;
>
>         struct { /* struct used by BPF_LINK_UPDATE command */
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index fc2bca2d9f05..429afa820c6f 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -4,6 +4,7 @@
>  #include <linux/bpf.h>
>  #include <linux/bpf_trace.h>
>  #include <linux/bpf_lirc.h>
> +#include <linux/bpf_verifier.h>
>  #include <linux/btf.h>
>  #include <linux/syscalls.h>
>  #include <linux/slab.h>
> @@ -2555,12 +2556,18 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
>         .fill_link_info = bpf_tracing_link_fill_link_info,
>  };
>
> -static int bpf_tracing_prog_attach(struct bpf_prog *prog)
> +static int bpf_tracing_prog_attach(struct bpf_prog *prog,
> +                                  int tgt_prog_fd,
> +                                  u32 btf_id)
>  {
>         struct bpf_link_primer link_primer;
>         struct bpf_prog *tgt_prog = NULL;
>         struct bpf_tracing_link *link;
> +       struct btf_func_model fmodel;
> +       bool new_trampoline = false;
>         struct bpf_trampoline *tr;
> +       long addr;
> +       u64 key;
>         int err;
>
>         switch (prog->type) {
> @@ -2588,6 +2595,24 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
>                 err = -EINVAL;
>                 goto out_put_prog;
>         }
> +       if (tgt_prog_fd) {
> +               /* For now we only allow new targets for BPF_PROG_TYPE_EXT */
> +               if (prog->type != BPF_PROG_TYPE_EXT || !btf_id) {
> +                       err = -EINVAL;
> +                       goto out_put_prog;
> +               }
> +
> +               tgt_prog = bpf_prog_get(tgt_prog_fd);
> +               if (IS_ERR(tgt_prog)) {
> +                       err = PTR_ERR(tgt_prog);
> +                       goto out_put_prog;
> +               }
> +
> +               key = ((u64)tgt_prog->aux->id) << 32 | btf_id;
> +       } else if (btf_id) {
> +               err = -EINVAL;
> +               goto out_put_prog;
> +       }

These changes are unnecessarily tangled with this if/else and
double-checking of btf_id. btf_id is required iff tgt_prog_fd is
specified, right? So just check that explicitly:

if (!!btf_id != !!targ_prog_fd)
  ...

bpf_iter code uses ^ instead of !=, but does same checks


if (targ_prog_fd) {
  if (prog->type != BPF_PROG_TYPE_EXT)
    ...
} /* no else here */

More straightforward, IMO.

>
>         link = kzalloc(sizeof(*link), GFP_USER);
>         if (!link) {

here you are leaking tgt_prog

> @@ -2600,33 +2625,65 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
>
>         mutex_lock(&prog->aux->tgt_mutex);
>
> -       if (!prog->aux->tgt_trampoline) {
> -               err = -ENOENT;
> -               goto out_unlock;
> +       if (!prog->aux->tgt_trampoline ||
> +           (tgt_prog && prog->aux->tgt_trampoline->key != key)) {
> +               new_trampoline = true;
> +       } else {
> +               if (tgt_prog) {
> +                       /* re-using ref to tgt_prog, don't take another */
> +                       bpf_prog_put(tgt_prog);

this is just another complication to keep in mind, I propose to drop
it and handle at the very end, see below

> +               }
> +               tr = prog->aux->tgt_trampoline;
> +               tgt_prog = prog->aux->tgt_prog;
> +       }

this also seems too convoluted with this new_trampoline flag. Tell me
where I'm missing something.

0. get the *incorrectly* missing trampoline case (i.e., we already
attached to it and we don't specify explicit target) right out of the
way, without having nested checks:

if (!prog->aux->tgt_trampoline && !tgt_prog)
  err = -ENOENT, goto;

1. Now we know that everything has to work out. new_trampoline
detection at the end is very trivial as well, so there is no need to
have this extra new_trampoline flag and split new trampoline handling
into two ifs.

if (!prog->aux->tgt_trampoline || key != tgt_trampoline->key) {
  /* gotta be a new trampoline */
  bpf_check_attach_target()
  tr = bpf_trampoline_get()
} else {
  /* reuse trampoline */
  tr = ...;
  tgt_prog = prog->aux->tgt_prog; /* we'll deal with refcount later */
}

> +
> +       if (new_trampoline) {
> +               if (!tgt_prog) {
> +                       err = -ENOENT;
> +                       goto out_unlock;
> +               }
> +
> +               err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
> +                                             &fmodel, &addr, NULL, NULL);
> +               if (err) {
> +                       bpf_prog_put(tgt_prog);
> +                       goto out_unlock;
> +               }
> +
> +               tr = bpf_trampoline_get(key, (void *)addr, &fmodel);
> +               if (IS_ERR(tr)) {
> +                       err = PTR_ERR(tr);
> +                       bpf_prog_put(tgt_prog);
> +                       goto out_unlock;
> +               }
>         }

this got rolled into a check above

> -       tr = prog->aux->tgt_trampoline;
> -       tgt_prog = prog->aux->tgt_prog;
>
>         err = bpf_link_prime(&link->link, &link_primer);
>         if (err) {
> -               goto out_unlock;
> +               goto out_put_tgt;
>         }
>
>         err = bpf_trampoline_link_prog(prog, tr);
>         if (err) {
>                 bpf_link_cleanup(&link_primer);
>                 link = NULL;
> -               goto out_unlock;
> +               goto out_put_tgt;
>         }
>
>         link->tgt_prog = tgt_prog;
>         link->trampoline = tr;
> -
> -       prog->aux->tgt_prog = NULL;
> -       prog->aux->tgt_trampoline = NULL;
> +       if (!new_trampoline) {
> +               prog->aux->tgt_trampoline = NULL;
> +               prog->aux->tgt_prog = NULL;
> +       }

this is just:

if (tr == prog->aux->tgt_trampoline) {
  /* drop ref held by prog itself, we've already got tgt_prog ref from
syscall */
  bpf_prog_put(prog->aux->tgt_prog);
  prog->aux->tgt_prog = NULL;
  prog->aux->tgt_trampoline = NULL;
}

>         mutex_unlock(&prog->aux->tgt_mutex);
>
>         return bpf_link_settle(&link_primer);
> +out_put_tgt:

see above, in some cases you are not putting tgt_prog where you
should. If you initialize tr to NULL, then you can do this
unconditionally on any error (no need to have different goto labels):

if (tgt_prog_fd && tgt_prog)
    bpf_prog_put(tgt_prog);
if (tr && tr != prog->aux->tgt_trampoline)
    bpf_trampoline_put(tr);


> +       if (new_trampoline) {
> +               bpf_prog_put(tgt_prog);
> +               bpf_trampoline_put(tr);
> +       }
>  out_unlock:
>         mutex_unlock(&prog->aux->tgt_mutex);
>         kfree(link);
> @@ -2744,7 +2801,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
>                         tp_name = prog->aux->attach_func_name;
>                         break;
>                 }
> -               return bpf_tracing_prog_attach(prog);
> +               return bpf_tracing_prog_attach(prog, 0, 0);
>         case BPF_PROG_TYPE_RAW_TRACEPOINT:
>         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
>                 if (strncpy_from_user(buf,
> @@ -2864,6 +2921,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
>         case BPF_CGROUP_GETSOCKOPT:
>         case BPF_CGROUP_SETSOCKOPT:
>                 return BPF_PROG_TYPE_CGROUP_SOCKOPT;
> +       case BPF_TRACE_FREPLACE:
> +               return BPF_PROG_TYPE_EXT;
>         case BPF_TRACE_ITER:
>                 return BPF_PROG_TYPE_TRACING;
>         case BPF_SK_LOOKUP:
> @@ -3924,10 +3983,16 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
>             prog->expected_attach_type == BPF_TRACE_ITER)
>                 return bpf_iter_link_attach(attr, prog);
>
> +       if (attr->link_create.attach_type == BPF_TRACE_FREPLACE &&
> +           !prog->expected_attach_type)
> +               return bpf_tracing_prog_attach(prog,
> +                                              attr->link_create.target_fd,
> +                                              attr->link_create.target_btf_id);

Hm.. so you added a "fake" BPF_TRACE_FREPLACE attach_type, which is
not really set with BPF_PROG_TYPE_EXT and is only specified for the
LINK_CREATE command. Are you just trying to satisfy the link_create
flow of going from attach_type to program type? If that's the only
reason, I think we can adjust link_create code to handle this more
flexibly.

I need to think a bit more whether we want BPF_TRACE_FREPLACE at all,
but if we do, whether we should make it an expected_attach_type for
BPF_PROG_TYPE_EXT then...

> +
>         return -EINVAL;
>  }
>
> -#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
> +#define BPF_LINK_CREATE_LAST_FIELD link_create.target_btf_id
>  static int link_create(union bpf_attr *attr)
>  {
>         enum bpf_prog_type ptype;
> @@ -3961,6 +4026,7 @@ static int link_create(union bpf_attr *attr)
>                 ret = cgroup_bpf_link_attach(attr, prog);
>                 break;
>         case BPF_PROG_TYPE_TRACING:
> +       case BPF_PROG_TYPE_EXT:
>                 ret = tracing_bpf_link_attach(attr, prog);
>                 break;
>         case BPF_PROG_TYPE_FLOW_DISSECTOR:
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 02f704367014..2dd5e2ad7f31 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -11202,6 +11202,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
>                 if (!btf_type_is_func_proto(t))
>                         return -EINVAL;
>
> +               if ((prog->aux->tgt_prog_type &&
> +                    prog->aux->tgt_prog_type != tgt_prog->type) ||
> +                   (prog->aux->tgt_attach_type &&
> +                    prog->aux->tgt_attach_type != tgt_prog->expected_attach_type))
> +                       return -EINVAL;
> +
>                 if (tgt_prog && conservative)
>                         t = NULL;
>
> @@ -11300,6 +11306,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
>                 return ret;
>
>         if (tgt_prog) {
> +               prog->aux->tgt_prog_type = tgt_prog->type;
> +               prog->aux->tgt_attach_type = tgt_prog->expected_attach_type;
> +
>                 if (prog->type == BPF_PROG_TYPE_EXT) {
>                         env->ops = bpf_verifier_ops[tgt_prog->type];
>                         prog->expected_attach_type =
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 7dd314176df7..46eaa3024dc3 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -239,6 +239,7 @@ enum bpf_attach_type {
>         BPF_XDP_CPUMAP,
>         BPF_SK_LOOKUP,
>         BPF_XDP,
> +       BPF_TRACE_FREPLACE,
>         __MAX_BPF_ATTACH_TYPE
>  };
>
> @@ -633,6 +634,7 @@ union bpf_attr {
>                 __u32           flags;          /* extra flags */
>                 __aligned_u64   iter_info;      /* extra bpf_iter_link_info */
>                 __u32           iter_info_len;  /* iter_info length */
> +               __u32           target_btf_id;  /* btf_id of target to attach to */
>         } link_create;
>
>         struct { /* struct used by BPF_LINK_UPDATE command */
>




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux