Re: [PATCH RESEND bpf-next v3 3/9] bpf: wrap prog->aux->linked_prog in a bpf_tracing_link

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Sep 11, 2020 at 3:00 AM Toke Høiland-Jørgensen <toke@xxxxxxxxxx> wrote:
>
> From: Toke Høiland-Jørgensen <toke@xxxxxxxxxx>
>
> The bpf_tracing_link structure is a convenient data structure to contain
> the reference to a linked program; in preparation for supporting multiple
> attachments for the same freplace program, move the linked_prog in
> prog->aux into a bpf_tracing_link wrapper.
>
> With this change, it is no longer possible to attach the same tracing
> program multiple times (detaching in-between), since the reference from the
> tracing program to the target disappears on the first attach. However,
> since the next patch will let the caller supply an attach target, that will
> also make it possible to attach to the same place multiple times.
>
> Signed-off-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx>
> ---
>  include/linux/bpf.h     |   21 +++++++++---
>  kernel/bpf/btf.c        |   13 +++++---
>  kernel/bpf/core.c       |    5 +--
>  kernel/bpf/syscall.c    |   81 +++++++++++++++++++++++++++++++++++++----------
>  kernel/bpf/trampoline.c |   12 ++-----
>  kernel/bpf/verifier.c   |   13 +++++---
>  6 files changed, 102 insertions(+), 43 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 7f19c3216370..722c60f1c1fc 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -26,6 +26,7 @@ struct bpf_verifier_log;
>  struct perf_event;
>  struct bpf_prog;
>  struct bpf_prog_aux;
> +struct bpf_tracing_link;
>  struct bpf_map;
>  struct sock;
>  struct seq_file;
> @@ -614,8 +615,8 @@ static __always_inline unsigned int bpf_dispatcher_nop_func(
>  }
>  #ifdef CONFIG_BPF_JIT
>  struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
> -int bpf_trampoline_link_prog(struct bpf_prog *prog);
> -int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
> +int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
> +int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
>  int bpf_trampoline_get(u64 key, void *addr,
>                        struct btf_func_model *fmodel,
>                        struct bpf_trampoline **trampoline);
> @@ -667,11 +668,13 @@ static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
>  {
>         return NULL;
>  }
> -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
> +static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
> +                                          struct bpf_trampoline *tr)
>  {
>         return -ENOTSUPP;
>  }
> -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
> +static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog,
> +                                            struct bpf_trampoline *tr)
>  {
>         return -ENOTSUPP;
>  }
> @@ -740,14 +743,13 @@ struct bpf_prog_aux {
>         u32 max_rdonly_access;
>         u32 max_rdwr_access;
>         const struct bpf_ctx_arg_aux *ctx_arg_info;
> -       struct bpf_prog *linked_prog;
> +       struct bpf_tracing_link *tgt_link;
>         bool verifier_zext; /* Zero extensions has been inserted by verifier. */
>         bool offload_requested;
>         bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
>         bool func_proto_unreliable;
>         bool sleepable;
>         enum bpf_tramp_prog_type trampoline_prog_type;
> -       struct bpf_trampoline *trampoline;
>         struct hlist_node tramp_hlist;
>         /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
>         const struct btf_type *attach_func_proto;
> @@ -827,6 +829,13 @@ struct bpf_link {
>         struct work_struct work;
>  };
>
> +struct bpf_tracing_link {
> +       struct bpf_link link;
> +       enum bpf_attach_type attach_type;
> +       struct bpf_trampoline *trampoline;
> +       struct bpf_prog *tgt_prog;
> +};
> +
>  struct bpf_link_ops {
>         void (*release)(struct bpf_link *link);
>         void (*dealloc)(struct bpf_link *link);
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 2ace56c99c36..e10f13f8251c 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3706,10 +3706,10 @@ struct btf *btf_parse_vmlinux(void)
>
>  struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
>  {
> -       struct bpf_prog *tgt_prog = prog->aux->linked_prog;
> +       struct bpf_tracing_link *tgt_link = prog->aux->tgt_link;
>
> -       if (tgt_prog) {
> -               return tgt_prog->aux->btf;
> +       if (tgt_link && tgt_link->tgt_prog) {
> +               return tgt_link->tgt_prog->aux->btf;
>         } else {
>                 return btf_vmlinux;
>         }
> @@ -3733,14 +3733,17 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
>                     struct bpf_insn_access_aux *info)
>  {
>         const struct btf_type *t = prog->aux->attach_func_proto;
> -       struct bpf_prog *tgt_prog = prog->aux->linked_prog;
>         struct btf *btf = bpf_prog_get_target_btf(prog);
>         const char *tname = prog->aux->attach_func_name;
>         struct bpf_verifier_log *log = info->log;
> +       struct bpf_prog *tgt_prog = NULL;
>         const struct btf_param *args;
>         u32 nr_args, arg;
>         int i, ret;
>
> +       if (prog->aux->tgt_link)
> +               tgt_prog = prog->aux->tgt_link->tgt_prog;
> +
>         if (off % 8) {
>                 bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
>                         tname, off);
> @@ -4572,7 +4575,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
>                 return -EFAULT;
>         }
>         if (prog_type == BPF_PROG_TYPE_EXT)
> -               prog_type = prog->aux->linked_prog->type;
> +               prog_type = prog->aux->tgt_link->tgt_prog->type;
>
>         t = btf_type_by_id(btf, t->type);
>         if (!t || !btf_type_is_func_proto(t)) {
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index ed0b3578867c..54c125cec218 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2130,7 +2130,6 @@ static void bpf_prog_free_deferred(struct work_struct *work)
>         if (aux->prog->has_callchain_buf)
>                 put_callchain_buffers();
>  #endif
> -       bpf_trampoline_put(aux->trampoline);
>         for (i = 0; i < aux->func_cnt; i++)
>                 bpf_jit_free(aux->func[i]);
>         if (aux->func_cnt) {
> @@ -2146,8 +2145,8 @@ void bpf_prog_free(struct bpf_prog *fp)
>  {
>         struct bpf_prog_aux *aux = fp->aux;
>
> -       if (aux->linked_prog)
> -               bpf_prog_put(aux->linked_prog);
> +       if (aux->tgt_link)
> +               bpf_link_put(&aux->tgt_link->link);

Until the link is primed, you shouldn't bpf_link_put() it. At this
stage the link itself is just a piece of memory that needs to be
kfree()'d. And your circular dependency problem doesn't exist anymore.
You'll have to put a trampoline and target prog manually here, though
(but you have a similar problem below as well, so might just have a
small helper to do this). But I think it's simpler that relying on an
artificial "defunct" state of not-yet-activated bpf_link, which you do
with the dance around link->prog = NULL.

>         INIT_WORK(&aux->work, bpf_prog_free_deferred);
>         schedule_work(&aux->work);
>  }
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 4108ef3b828b..2d238aa8962e 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2095,10 +2095,13 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
>  /* last field in 'union bpf_attr' used by this command */
>  #define        BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
>
> +static struct bpf_tracing_link *bpf_tracing_link_create(struct bpf_prog *prog,
> +                                                       struct bpf_prog *tgt_prog);
> +
>  static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
>  {
>         enum bpf_prog_type type = attr->prog_type;
> -       struct bpf_prog *prog;
> +       struct bpf_prog *prog, *tgt_prog = NULL;
>         int err;
>         char license[128];
>         bool is_gpl;
> @@ -2154,14 +2157,27 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
>         prog->expected_attach_type = attr->expected_attach_type;
>         prog->aux->attach_btf_id = attr->attach_btf_id;
>         if (attr->attach_prog_fd) {
> -               struct bpf_prog *tgt_prog;
> -
>                 tgt_prog = bpf_prog_get(attr->attach_prog_fd);
>                 if (IS_ERR(tgt_prog)) {
>                         err = PTR_ERR(tgt_prog);
>                         goto free_prog_nouncharge;
>                 }
> -               prog->aux->linked_prog = tgt_prog;
> +       }
> +
> +       if (tgt_prog || prog->aux->attach_btf_id) {
> +               struct bpf_tracing_link *link;
> +
> +               link = bpf_tracing_link_create(prog, tgt_prog);
> +               if (IS_ERR(link)) {
> +                       err = PTR_ERR(link);
> +                       if (tgt_prog)
> +                               bpf_prog_put(tgt_prog);
> +                       goto free_prog_nouncharge;
> +               }
> +               prog->aux->tgt_link = link;
> +
> +               /* avoid circular ref - will be set on link activation */
> +               link->link.prog = NULL;

See above. I think this might work, but it's sort of an entire new
pattern for links and I'd like to avoid having to think about defunct
-> active -> defunct bpf_link state transitions. All you have to do is
clean up the not-yet-activated link appropriately, but otherwise no
unnecessary link internals manipulations are necessary.

>         }
>
>         prog->aux->offload_requested = !!attr->prog_ifindex;
> @@ -2495,14 +2511,21 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
>         return link;
>  }
>
> -struct bpf_tracing_link {
> -       struct bpf_link link;
> -       enum bpf_attach_type attach_type;
> -};
> -
>  static void bpf_tracing_link_release(struct bpf_link *link)
>  {
> -       WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
> +       struct bpf_tracing_link *tr_link =
> +               container_of(link, struct bpf_tracing_link, link);
> +
> +       if (tr_link->trampoline) {
> +               if (link->id)

this link->id check won't be necessary, as bpf_tracing_link_release
would be called only on fully constructed and activated link.

> +                       WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
> +                                                               tr_link->trampoline));
> +
> +               bpf_trampoline_put(tr_link->trampoline);
> +       }
> +
> +       if (tr_link->tgt_prog)
> +               bpf_prog_put(tr_link->tgt_prog);
>  }
>
>  static void bpf_tracing_link_dealloc(struct bpf_link *link)
> @@ -2542,10 +2565,27 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
>         .fill_link_info = bpf_tracing_link_fill_link_info,
>  };
>
> +static struct bpf_tracing_link *bpf_tracing_link_create(struct bpf_prog *prog,
> +                                                       struct bpf_prog *tgt_prog)
> +{
> +       struct bpf_tracing_link *link;
> +
> +       link = kzalloc(sizeof(*link), GFP_USER);
> +       if (!link)
> +               return ERR_PTR(-ENOMEM);
> +
> +       bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
> +                     &bpf_tracing_link_lops, prog);
> +       link->attach_type = prog->expected_attach_type;
> +       link->tgt_prog = tgt_prog;
> +
> +       return link;
> +}
> +
>  static int bpf_tracing_prog_attach(struct bpf_prog *prog)
>  {
> +       struct bpf_tracing_link *link, *olink;
>         struct bpf_link_primer link_primer;
> -       struct bpf_tracing_link *link;
>         int err;
>
>         switch (prog->type) {
> @@ -2574,14 +2614,16 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
>                 goto out_put_prog;
>         }
>
> -       link = kzalloc(sizeof(*link), GFP_USER);
> +       link = READ_ONCE(prog->aux->tgt_link);
>         if (!link) {
> -               err = -ENOMEM;
> +               err = -ENOENT;
> +               goto out_put_prog;
> +       }
> +       olink = cmpxchg(&prog->aux->tgt_link, link, NULL);
> +       if (olink != link) {
> +               err = -ENOENT;
>                 goto out_put_prog;
>         }

Wouldn't single xchg to NULL be sufficient to achieve the same?
READ_ONCE + cmpxchg seems unnecessary to me.

> -       bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
> -                     &bpf_tracing_link_lops, prog);
> -       link->attach_type = prog->expected_attach_type;
>
>         err = bpf_link_prime(&link->link, &link_primer);
>         if (err) {

if priming errors out, you need to put target prog and trampoline,
kfree(link) won't do it (and calling bpf_link_cleanup() is not correct
before priming). See above as well.

BTW, one interesting side effect of all this is that if your initial
attach failed, you won't be able to try again, because
prog->aux->tgt_link is gone. If that's the problem, we'll need to
introduce locking and copy that link, try to attach, then clear out
prog->aug->tgt_link only if we succeeded. Just bringing this up, as it
might not be obvious (or I might be wrong :).

> @@ -2589,12 +2631,17 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
>                 goto out_put_prog;
>         }
>
> -       err = bpf_trampoline_link_prog(prog);
> +       err = bpf_trampoline_link_prog(prog, link->trampoline);
>         if (err) {
>                 bpf_link_cleanup(&link_primer);
>                 goto out_put_prog;
>         }
>
> +       /* at this point the link is no longer referenced from struct bpf_prog,
> +        * so we can populate this without introducing a circular reference.
> +        */
> +       link->link.prog = prog;

this won't be necessary either, as per above discussion on circular dependencies

> +
>         return bpf_link_settle(&link_primer);
>  out_put_prog:
>         bpf_prog_put(prog);

[...]




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux