On Tue, Apr 19, 2022 at 12:00:48PM -0700, Stanislav Fomichev wrote: > +static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[]) > +{ > + enum bpf_cgroup_storage_type stype; > + > + for_each_cgroup_storage_type(stype) > + bpf_cgroup_storage_unlink(storages[stype]); > +} > + > /* Called when bpf_cgroup_link is auto-detached from dying cgroup. > * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It > * doesn't free link memory, which will eventually be done by bpf_link's > @@ -166,6 +256,16 @@ static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link) > link->cgroup = NULL; > } > > +static void bpf_cgroup_lsm_shim_release(struct bpf_prog *prog, > + enum cgroup_bpf_attach_type atype) > +{ > + if (prog->aux->cgroup_atype < CGROUP_LSM_START || > + prog->aux->cgroup_atype > CGROUP_LSM_END) > + return; > + > + bpf_trampoline_unlink_cgroup_shim(prog); > +} > + > /** > * cgroup_bpf_release() - put references of all bpf programs and > * release all cgroup bpf data > @@ -190,10 +290,18 @@ static void cgroup_bpf_release(struct work_struct *work) > > hlist_for_each_entry_safe(pl, pltmp, progs, node) { > hlist_del(&pl->node); > - if (pl->prog) > + if (pl->prog) { > + if (atype == BPF_LSM_CGROUP) > + bpf_cgroup_lsm_shim_release(pl->prog, > + atype); > bpf_prog_put(pl->prog); > - if (pl->link) > + } > + if (pl->link) { > + if (atype == BPF_LSM_CGROUP) > + bpf_cgroup_lsm_shim_release(pl->link->link.prog, > + atype); > bpf_cgroup_link_auto_detach(pl->link); > + } > kfree(pl); > static_branch_dec(&cgroup_bpf_enabled_key[atype]); > } > @@ -506,6 +614,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > struct bpf_prog *old_prog = NULL; > struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; > struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; > + struct bpf_attach_target_info tgt_info = {}; > enum cgroup_bpf_attach_type atype; > struct bpf_prog_list *pl; > struct hlist_head *progs; > @@ -522,9 +631,35 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > /* replace_prog implies BPF_F_REPLACE, and vice versa */ > return -EINVAL; > > - atype = to_cgroup_bpf_attach_type(type); > - if (atype < 0) > - return -EINVAL; > + if (type == BPF_LSM_CGROUP) { > + struct bpf_prog *p = prog ? : link->link.prog; > + > + if (replace_prog) { > + /* Reusing shim from the original program. > + */ > + if (replace_prog->aux->attach_btf_id != > + p->aux->attach_btf_id) > + return -EINVAL; > + > + atype = replace_prog->aux->cgroup_atype; > + } else { > + err = bpf_check_attach_target(NULL, p, NULL, > + p->aux->attach_btf_id, > + &tgt_info); > + if (err) > + return -EINVAL; > + > + atype = bpf_lsm_attach_type_get(p->aux->attach_btf_id); > + if (atype < 0) > + return atype; > + } > + > + p->aux->cgroup_atype = atype; > + } else { > + atype = to_cgroup_bpf_attach_type(type); > + if (atype < 0) > + return -EINVAL; > + } > > progs = &cgrp->bpf.progs[atype]; > > @@ -580,13 +715,26 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > if (err) > goto cleanup; > > + bpf_cgroup_storages_link(new_storage, cgrp, type); After looking between this patch 3 and the next patch 4, I can't quite think this through quickly, so it may be faster to ask :) I have questions on the ordering between update_effective_progs(), bpf_cgroup_storages_link(), and bpf_trampoline_link_cgroup_shim(). Why bpf_cgroup_storages_link() has to be moved up and done before bpf_trampoline_link_cgroup_shim() ? > + > + if (type == BPF_LSM_CGROUP && !old_prog) { > + struct bpf_prog *p = prog ? : link->link.prog; > + > + err = bpf_trampoline_link_cgroup_shim(p, &tgt_info); update_effective_progs() was done a few lines above, so the effective[atype] array has the new_prog now. If bpf_trampoline_link_cgroup_shim() did fail to add the shim_prog to the trampoline, the new_prog will still be left in effective[atype]. There is no shim_prog to execute effective[]. However, there may be places that access effective[]. e.g. __cgroup_bpf_query() although I think to_cgroup_bpf_attach_type() is not handling BPF_LSM_CGROUP now. More on __cgroup_bpf_query() later. Doing bpf_trampoline_link_cgroup_shim() just before activate_effective_progs() ? Have you thought about what is needed to support __cgroup_bpf_query() ? bpf_attach_type and cgroup_bpf_attach_type is no longer a 1:1 relationship. Looping through cgroup_lsm_atype_usecnt[] and output them under BPF_LSM_CGROUP ? Same goes for local_storage. All lsm-cgrp attaching to different attach_btf_id sharing one local_storage because the key is only cgroup-id and attach_type. Is it enough to start with that first and the key could be extended later with a new map_flag? This is related to the API. > + if (err) > + goto cleanup_trampoline; > + } > + > if (old_prog) > bpf_prog_put(old_prog); > else > static_branch_inc(&cgroup_bpf_enabled_key[atype]); > - bpf_cgroup_storages_link(new_storage, cgrp, type); > + > return 0; > > +cleanup_trampoline: > + bpf_cgroup_storages_unlink(new_storage); > + > cleanup: > if (old_prog) { > pl->prog = old_prog; > @@ -678,9 +826,13 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, > struct hlist_head *progs; > bool found = false; > > - atype = to_cgroup_bpf_attach_type(link->type); > - if (atype < 0) > - return -EINVAL; > + if (link->type == BPF_LSM_CGROUP) { > + atype = link->link.prog->aux->cgroup_atype; > + } else { > + atype = to_cgroup_bpf_attach_type(link->type); > + if (atype < 0) > + return -EINVAL; > + } > > progs = &cgrp->bpf.progs[atype]; > > @@ -696,6 +848,9 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, > if (!found) > return -ENOENT; > > + if (link->type == BPF_LSM_CGROUP) > + new_prog->aux->cgroup_atype = atype; > + > old_prog = xchg(&link->link.prog, new_prog); > replace_effective_prog(cgrp, atype, link); > bpf_prog_put(old_prog); > @@ -779,9 +934,15 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, > u32 flags; > int err; > > - atype = to_cgroup_bpf_attach_type(type); > - if (atype < 0) > - return -EINVAL; > + if (type == BPF_LSM_CGROUP) { > + struct bpf_prog *p = prog ? : link->link.prog; > + > + atype = p->aux->cgroup_atype; > + } else { > + atype = to_cgroup_bpf_attach_type(type); > + if (atype < 0) > + return -EINVAL; > + } > > progs = &cgrp->bpf.progs[atype]; > flags = cgrp->bpf.flags[atype]; > @@ -803,6 +964,10 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, > if (err) > goto cleanup; > > + if (type == BPF_LSM_CGROUP) > + bpf_cgroup_lsm_shim_release(prog ? : link->link.prog, > + atype); > + > /* now can actually delete it from this cgroup list */ > hlist_del(&pl->node); > [ ... ] > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c > index 0c4fd194e801..c76dfa4ea2d9 100644 > --- a/kernel/bpf/trampoline.c > +++ b/kernel/bpf/trampoline.c > @@ -11,6 +11,8 @@ > #include <linux/rcupdate_wait.h> > #include <linux/module.h> > #include <linux/static_call.h> > +#include <linux/bpf_verifier.h> > +#include <linux/bpf_lsm.h> > > /* dummy _ops. The verifier will operate on target program's ops. */ > const struct bpf_verifier_ops bpf_extension_verifier_ops = { > @@ -485,6 +487,149 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) > return err; > } > > +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) > +static struct bpf_prog *cgroup_shim_alloc(const struct bpf_prog *prog, > + bpf_func_t bpf_func) > +{ > + struct bpf_prog *p; > + > + p = bpf_prog_alloc(1, 0); > + if (!p) > + return NULL; > + > + p->jited = false; > + p->bpf_func = bpf_func; > + > + p->aux->cgroup_atype = prog->aux->cgroup_atype; > + p->aux->attach_func_proto = prog->aux->attach_func_proto; > + p->aux->attach_btf_id = prog->aux->attach_btf_id; > + p->aux->attach_btf = prog->aux->attach_btf; > + btf_get(p->aux->attach_btf); > + p->type = BPF_PROG_TYPE_LSM; > + p->expected_attach_type = BPF_LSM_MAC; > + bpf_prog_inc(p); > + > + return p; > +} > + > +static struct bpf_prog *cgroup_shim_find(struct bpf_trampoline *tr, > + bpf_func_t bpf_func) > +{ > + const struct bpf_prog_aux *aux; > + int kind; > + > + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { > + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { > + struct bpf_prog *p = aux->prog; > + > + if (p->bpf_func == bpf_func) > + return p; > + } > + } > + > + return NULL; > +} > + > +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, > + struct bpf_attach_target_info *tgt_info) > +{ > + struct bpf_prog *shim_prog = NULL; > + struct bpf_trampoline *tr; > + bpf_func_t bpf_func; > + u64 key; > + int err; > + > + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, > + prog->aux->attach_btf_id); > + > + err = bpf_lsm_find_cgroup_shim(prog, &bpf_func); > + if (err) > + return err; > + > + tr = bpf_trampoline_get(key, tgt_info); > + if (!tr) > + return -ENOMEM; > + > + mutex_lock(&tr->mutex); > + > + shim_prog = cgroup_shim_find(tr, bpf_func); > + if (shim_prog) { > + /* Reusing existing shim attached by the other program. > + */ > + bpf_prog_inc(shim_prog); > + mutex_unlock(&tr->mutex); > + return 0; > + } > + > + /* Allocate and install new shim. > + */ > + > + shim_prog = cgroup_shim_alloc(prog, bpf_func); > + if (!shim_prog) { > + err = -ENOMEM; > + goto out; > + } > + > + err = __bpf_trampoline_link_prog(shim_prog, tr); > + if (err) > + goto out; > + > + mutex_unlock(&tr->mutex); > + > + return 0; > +out: > + if (shim_prog) > + bpf_prog_put(shim_prog); > + > + mutex_unlock(&tr->mutex); > + return err; > +} > + > +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) > +{ > + struct bpf_prog *shim_prog; > + struct bpf_trampoline *tr; > + bpf_func_t bpf_func; > + u64 key; > + int err; > + > + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, > + prog->aux->attach_btf_id); > + > + err = bpf_lsm_find_cgroup_shim(prog, &bpf_func); > + if (err) > + return; > + > + tr = bpf_trampoline_lookup(key); > + if (!tr) > + return; > + > + mutex_lock(&tr->mutex); > + > + shim_prog = cgroup_shim_find(tr, bpf_func); > + if (shim_prog) { > + /* We use shim_prog refcnt for tracking whether to > + * remove the shim program from the trampoline. > + * Trampoline's mutex is held while refcnt is > + * added/subtracted so we don't need to care about > + * potential races. > + */ > + > + if (atomic64_read(&shim_prog->aux->refcnt) == 1) > + WARN_ON_ONCE(__bpf_trampoline_unlink_prog(shim_prog, tr)); > + > + bpf_prog_put(shim_prog); > + } > + > + mutex_unlock(&tr->mutex); > + > + bpf_trampoline_put(tr); /* bpf_trampoline_lookup */ > + > + if (shim_prog) > + bpf_trampoline_put(tr); > +} > +#endif > +