Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx> writes: > On Fri, Sep 11, 2020 at 3:00 AM Toke Høiland-Jørgensen <toke@xxxxxxxxxx> wrote: >> >> From: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> >> >> The bpf_tracing_link structure is a convenient data structure to contain >> the reference to a linked program; in preparation for supporting multiple >> attachments for the same freplace program, move the linked_prog in >> prog->aux into a bpf_tracing_link wrapper. >> >> With this change, it is no longer possible to attach the same tracing >> program multiple times (detaching in-between), since the reference from the >> tracing program to the target disappears on the first attach. However, >> since the next patch will let the caller supply an attach target, that will >> also make it possible to attach to the same place multiple times. >> >> Signed-off-by: Toke Høiland-Jørgensen <toke@xxxxxxxxxx> >> --- >> include/linux/bpf.h | 21 +++++++++--- >> kernel/bpf/btf.c | 13 +++++--- >> kernel/bpf/core.c | 5 +-- >> kernel/bpf/syscall.c | 81 +++++++++++++++++++++++++++++++++++++---------- >> kernel/bpf/trampoline.c | 12 ++----- >> kernel/bpf/verifier.c | 13 +++++--- >> 6 files changed, 102 insertions(+), 43 deletions(-) >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index 7f19c3216370..722c60f1c1fc 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -26,6 +26,7 @@ struct bpf_verifier_log; >> struct perf_event; >> struct bpf_prog; >> struct bpf_prog_aux; >> +struct bpf_tracing_link; >> struct bpf_map; >> struct sock; >> struct seq_file; >> @@ -614,8 +615,8 @@ static __always_inline unsigned int bpf_dispatcher_nop_func( >> } >> #ifdef CONFIG_BPF_JIT >> struct bpf_trampoline *bpf_trampoline_lookup(u64 key); >> -int bpf_trampoline_link_prog(struct bpf_prog *prog); >> -int bpf_trampoline_unlink_prog(struct bpf_prog *prog); >> +int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); >> +int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); >> int bpf_trampoline_get(u64 key, void *addr, >> struct btf_func_model *fmodel, >> struct bpf_trampoline **trampoline); >> @@ -667,11 +668,13 @@ static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) >> { >> return NULL; >> } >> -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog) >> +static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, >> + struct bpf_trampoline *tr) >> { >> return -ENOTSUPP; >> } >> -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) >> +static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, >> + struct bpf_trampoline *tr) >> { >> return -ENOTSUPP; >> } >> @@ -740,14 +743,13 @@ struct bpf_prog_aux { >> u32 max_rdonly_access; >> u32 max_rdwr_access; >> const struct bpf_ctx_arg_aux *ctx_arg_info; >> - struct bpf_prog *linked_prog; >> + struct bpf_tracing_link *tgt_link; >> bool verifier_zext; /* Zero extensions has been inserted by verifier. */ >> bool offload_requested; >> bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ >> bool func_proto_unreliable; >> bool sleepable; >> enum bpf_tramp_prog_type trampoline_prog_type; >> - struct bpf_trampoline *trampoline; >> struct hlist_node tramp_hlist; >> /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ >> const struct btf_type *attach_func_proto; >> @@ -827,6 +829,13 @@ struct bpf_link { >> struct work_struct work; >> }; >> >> +struct bpf_tracing_link { >> + struct bpf_link link; >> + enum bpf_attach_type attach_type; >> + struct bpf_trampoline *trampoline; >> + struct bpf_prog *tgt_prog; >> +}; >> + >> struct bpf_link_ops { >> void (*release)(struct bpf_link *link); >> void (*dealloc)(struct bpf_link *link); >> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c >> index 2ace56c99c36..e10f13f8251c 100644 >> --- a/kernel/bpf/btf.c >> +++ b/kernel/bpf/btf.c >> @@ -3706,10 +3706,10 @@ struct btf *btf_parse_vmlinux(void) >> >> struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) >> { >> - struct bpf_prog *tgt_prog = prog->aux->linked_prog; >> + struct bpf_tracing_link *tgt_link = prog->aux->tgt_link; >> >> - if (tgt_prog) { >> - return tgt_prog->aux->btf; >> + if (tgt_link && tgt_link->tgt_prog) { >> + return tgt_link->tgt_prog->aux->btf; >> } else { >> return btf_vmlinux; >> } >> @@ -3733,14 +3733,17 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, >> struct bpf_insn_access_aux *info) >> { >> const struct btf_type *t = prog->aux->attach_func_proto; >> - struct bpf_prog *tgt_prog = prog->aux->linked_prog; >> struct btf *btf = bpf_prog_get_target_btf(prog); >> const char *tname = prog->aux->attach_func_name; >> struct bpf_verifier_log *log = info->log; >> + struct bpf_prog *tgt_prog = NULL; >> const struct btf_param *args; >> u32 nr_args, arg; >> int i, ret; >> >> + if (prog->aux->tgt_link) >> + tgt_prog = prog->aux->tgt_link->tgt_prog; >> + >> if (off % 8) { >> bpf_log(log, "func '%s' offset %d is not multiple of 8\n", >> tname, off); >> @@ -4572,7 +4575,7 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog, >> return -EFAULT; >> } >> if (prog_type == BPF_PROG_TYPE_EXT) >> - prog_type = prog->aux->linked_prog->type; >> + prog_type = prog->aux->tgt_link->tgt_prog->type; >> >> t = btf_type_by_id(btf, t->type); >> if (!t || !btf_type_is_func_proto(t)) { >> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c >> index ed0b3578867c..54c125cec218 100644 >> --- a/kernel/bpf/core.c >> +++ b/kernel/bpf/core.c >> @@ -2130,7 +2130,6 @@ static void bpf_prog_free_deferred(struct work_struct *work) >> if (aux->prog->has_callchain_buf) >> put_callchain_buffers(); >> #endif >> - bpf_trampoline_put(aux->trampoline); >> for (i = 0; i < aux->func_cnt; i++) >> bpf_jit_free(aux->func[i]); >> if (aux->func_cnt) { >> @@ -2146,8 +2145,8 @@ void bpf_prog_free(struct bpf_prog *fp) >> { >> struct bpf_prog_aux *aux = fp->aux; >> >> - if (aux->linked_prog) >> - bpf_prog_put(aux->linked_prog); >> + if (aux->tgt_link) >> + bpf_link_put(&aux->tgt_link->link); > > Until the link is primed, you shouldn't bpf_link_put() it. At this > stage the link itself is just a piece of memory that needs to be > kfree()'d. And your circular dependency problem doesn't exist anymore. > You'll have to put a trampoline and target prog manually here, though > (but you have a similar problem below as well, so might just have a > small helper to do this). But I think it's simpler that relying on an > artificial "defunct" state of not-yet-activated bpf_link, which you do > with the dance around link->prog = NULL. Yeah, makes sense. I initially figured that would be 'breaking the abstraction' of bpf_link, but I ended up having to do that anyway, so you're right I might as well treat it as a piece of memory here. [...] >> @@ -2574,14 +2614,16 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) >> goto out_put_prog; >> } >> >> - link = kzalloc(sizeof(*link), GFP_USER); >> + link = READ_ONCE(prog->aux->tgt_link); >> if (!link) { >> - err = -ENOMEM; >> + err = -ENOENT; >> + goto out_put_prog; >> + } >> + olink = cmpxchg(&prog->aux->tgt_link, link, NULL); >> + if (olink != link) { >> + err = -ENOENT; >> goto out_put_prog; >> } > > Wouldn't single xchg to NULL be sufficient to achieve the same? > READ_ONCE + cmpxchg seems unnecessary to me. It would, but in the next patch I'm introducing a check on the contents of the link before cmpxchg'ing it, so figured it was easier to just use the same pattern here. >> - bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, >> - &bpf_tracing_link_lops, prog); >> - link->attach_type = prog->expected_attach_type; >> >> err = bpf_link_prime(&link->link, &link_primer); >> if (err) { > > if priming errors out, you need to put target prog and trampoline, > kfree(link) won't do it (and calling bpf_link_cleanup() is not correct > before priming). See above as well. Ah yes, good catch! > BTW, one interesting side effect of all this is that if your initial > attach failed, you won't be able to try again, because > prog->aux->tgt_link is gone. If that's the problem, we'll need to > introduce locking and copy that link, try to attach, then clear out > prog->aug->tgt_link only if we succeeded. Just bringing this up, as it > might not be obvious (or I might be wrong :). Yeah, did think about that. From a purist PoV you're right that a "destructive attempt" is not ideal; but we already agreed that clearing out the link on attach was an acceptable change in behaviour. And I figured that a failure in link_prim() or trampoline_link_prog() would be quite rare, so not something we'd want to expend a lot of effort ensuring was really atomic... -Toke