On Fri, Jun 10, 2022 at 09:57:56AM -0700, Stanislav Fomichev wrote: > Allow attaching to lsm hooks in the cgroup context. > > Attaching to per-cgroup LSM works exactly like attaching > to other per-cgroup hooks. New BPF_LSM_CGROUP is added > to trigger new mode; the actual lsm hook we attach to is > signaled via existing attach_btf_id. > > For the hooks that have 'struct socket' or 'struct sock' as its first > argument, we use the cgroup associated with that socket. For the rest, > we use 'current' cgroup (this is all on default hierarchy == v2 only). > Note that for some hooks that work on 'struct sock' we still > take the cgroup from 'current' because some of them work on the socket > that hasn't been properly initialized yet. > > Behind the scenes, we allocate a shim program that is attached > to the trampoline and runs cgroup effective BPF programs array. > This shim has some rudimentary ref counting and can be shared > between several programs attaching to the same per-cgroup lsm hook. nit. The 'per-cgroup' may be read as each cgroup has its own shim. It will be useful to rephrase it a little. > > Note that this patch bloats cgroup size because we add 211 > cgroup_bpf_attach_type(s) for simplicity sake. This will be > addressed in the subsequent patch. > > Also note that we only add non-sleepable flavor for now. To enable > sleepable use-cases, bpf_prog_run_array_cg has to grab trace rcu, > shim programs have to be freed via trace rcu, cgroup_bpf.effective > should be also trace-rcu-managed + maybe some other changes that > I'm not aware of. > > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx> [ ... ] > @@ -1840,10 +1850,8 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, > emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); > /* arg3: lea rdx, [rbp - run_ctx_off] */ > EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); > - if (emit_call(&prog, > - p->aux->sleepable ? __bpf_prog_exit_sleepable : > - __bpf_prog_exit, prog)) > - return -EINVAL; > + if (emit_call(&prog, exit, prog)) > + return -EINVAL; > > *pprog = prog; > return 0; > diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h > index 5d268e76d8e6..b99f8c3e37ea 100644 > --- a/include/linux/bpf-cgroup-defs.h > +++ b/include/linux/bpf-cgroup-defs.h > @@ -10,6 +10,12 @@ > > struct bpf_prog_array; > > +#ifdef CONFIG_BPF_LSM > +#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */ > +#else > +#define CGROUP_LSM_NUM 0 > +#endif > + > enum cgroup_bpf_attach_type { > CGROUP_BPF_ATTACH_TYPE_INVALID = -1, > CGROUP_INET_INGRESS = 0, > @@ -35,6 +41,8 @@ enum cgroup_bpf_attach_type { > CGROUP_INET4_GETSOCKNAME, > CGROUP_INET6_GETSOCKNAME, > CGROUP_INET_SOCK_RELEASE, > + CGROUP_LSM_START, > + CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, Likely a dumb question, just in case, presumably everything should be fine when CGROUP_LSM_NUM is 0 ? > MAX_CGROUP_BPF_ATTACH_TYPE > }; > [ ... ] > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 4adb4f3ecb7f..b0314889a409 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -14,6 +14,8 @@ > #include <linux/string.h> > #include <linux/bpf.h> > #include <linux/bpf-cgroup.h> > +#include <linux/bpf_lsm.h> > +#include <linux/bpf_verifier.h> > #include <net/sock.h> > #include <net/bpf_sk_storage.h> > > @@ -61,6 +63,88 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, > return run_ctx.retval; > } > > +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, > + const struct bpf_insn *insn) > +{ > + const struct bpf_prog *shim_prog; > + struct sock *sk; > + struct cgroup *cgrp; > + int ret = 0; > + u64 *args; > + > + args = (u64 *)ctx; > + sk = (void *)(unsigned long)args[0]; > + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ > + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); > + > + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); > + if (likely(cgrp)) > + ret = bpf_prog_run_array_cg(&cgrp->bpf, > + shim_prog->aux->cgroup_atype, > + ctx, bpf_prog_run, 0, NULL); > + return ret; > +} > + > +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, > + const struct bpf_insn *insn) > +{ > + const struct bpf_prog *shim_prog; > + struct socket *sock; > + struct cgroup *cgrp; > + int ret = 0; > + u64 *args; > + > + args = (u64 *)ctx; > + sock = (void *)(unsigned long)args[0]; > + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ > + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); > + > + cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); > + if (likely(cgrp)) > + ret = bpf_prog_run_array_cg(&cgrp->bpf, > + shim_prog->aux->cgroup_atype, > + ctx, bpf_prog_run, 0, NULL); > + return ret; > +} > + > +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, > + const struct bpf_insn *insn) > +{ > + const struct bpf_prog *shim_prog; > + struct cgroup *cgrp; > + int ret = 0; > + > + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ > + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); > + > + rcu_read_lock(); nit. Not needed. May be a comment about it has been acquired in __bpf_prog_enter_lsm_cgroup(). For sleepable in the future, rcu_read_lock() cannot be done here also. > + cgrp = task_dfl_cgroup(current); > + if (likely(cgrp)) > + ret = bpf_prog_run_array_cg(&cgrp->bpf, > + shim_prog->aux->cgroup_atype, > + ctx, bpf_prog_run, 0, NULL); > + rcu_read_unlock(); > + return ret; > +} > + > +#ifdef CONFIG_BPF_LSM > +static enum cgroup_bpf_attach_type > +bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) > +{ > + if (attach_type != BPF_LSM_CGROUP) > + return to_cgroup_bpf_attach_type(attach_type); > + return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id); > +} > +#else > +static enum cgroup_bpf_attach_type > +bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) > +{ > + if (attach_type != BPF_LSM_CGROUP) > + return to_cgroup_bpf_attach_type(attach_type); > + return -EOPNOTSUPP; > +} > +#endif /* CONFIG_BPF_LSM */ > + > void cgroup_bpf_offline(struct cgroup *cgrp) > { > cgroup_get(cgrp); > @@ -163,10 +247,20 @@ static void cgroup_bpf_release(struct work_struct *work) > > hlist_for_each_entry_safe(pl, pltmp, progs, node) { > hlist_del(&pl->node); > - if (pl->prog) > + if (pl->prog) { > +#ifdef CONFIG_BPF_LSM This should not be needed as it is not needed in __cgroup_bpf_attach() below. > + if (pl->prog->expected_attach_type == BPF_LSM_CGROUP) > + bpf_trampoline_unlink_cgroup_shim(pl->prog); > +#endif > bpf_prog_put(pl->prog); > - if (pl->link) > + } > + if (pl->link) { > +#ifdef CONFIG_BPF_LSM Same here. > + if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP) > + bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog); > +#endif > bpf_cgroup_link_auto_detach(pl->link); > + } > kfree(pl); > static_branch_dec(&cgroup_bpf_enabled_key[atype]); > } > @@ -479,6 +573,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > struct bpf_prog *old_prog = NULL; > struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; > struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; > + struct bpf_prog *new_prog = prog ? : link->link.prog; > enum cgroup_bpf_attach_type atype; > struct bpf_prog_list *pl; > struct hlist_head *progs; > @@ -495,7 +590,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > /* replace_prog implies BPF_F_REPLACE, and vice versa */ > return -EINVAL; > > - atype = to_cgroup_bpf_attach_type(type); > + atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id); > if (atype < 0) > return -EINVAL; > > @@ -549,17 +644,30 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > bpf_cgroup_storages_assign(pl->storage, storage); > cgrp->bpf.flags[atype] = saved_flags; > > + if (type == BPF_LSM_CGROUP) { > + err = bpf_trampoline_link_cgroup_shim(new_prog, atype); > + if (err) > + goto cleanup; > + } > + > err = update_effective_progs(cgrp, atype); > if (err) > - goto cleanup; > + goto cleanup_trampoline; > > - if (old_prog) > + if (old_prog) { > + if (type == BPF_LSM_CGROUP) > + bpf_trampoline_unlink_cgroup_shim(old_prog); > bpf_prog_put(old_prog); > - else > + } else { > static_branch_inc(&cgroup_bpf_enabled_key[atype]); > + } > bpf_cgroup_storages_link(new_storage, cgrp, type); > return 0; > > +cleanup_trampoline: > + if (type == BPF_LSM_CGROUP) > + bpf_trampoline_unlink_cgroup_shim(new_prog); > + > cleanup: > if (old_prog) { > pl->prog = old_prog; > @@ -651,7 +759,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp, > struct hlist_head *progs; > bool found = false; > > - atype = to_cgroup_bpf_attach_type(link->type); > + atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id); > if (atype < 0) > return -EINVAL; > > @@ -803,9 +911,15 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, > struct bpf_prog *old_prog; > struct bpf_prog_list *pl; > struct hlist_head *progs; > + u32 attach_btf_id = 0; > u32 flags; > > - atype = to_cgroup_bpf_attach_type(type); > + if (prog) > + attach_btf_id = prog->aux->attach_btf_id; > + if (link) > + attach_btf_id = link->link.prog->aux->attach_btf_id; > + > + atype = bpf_cgroup_atype_find(type, attach_btf_id); > if (atype < 0) > return -EINVAL; > > @@ -839,8 +953,11 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, > if (hlist_empty(progs)) > /* last program was detached, reset flags to zero */ > cgrp->bpf.flags[atype] = 0; > - if (old_prog) > + if (old_prog) { > + if (type == BPF_LSM_CGROUP) > + bpf_trampoline_unlink_cgroup_shim(old_prog); I think the same bpf_trampoline_unlink_cgroup_shim() needs to be done in bpf_cgroup_link_release()? It should be done just after WARN_ON(__cgroup_bpf_detach()). [ ... ] > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index aeb31137b2ed..a237be4f8bb3 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -3416,6 +3416,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) > return BPF_PROG_TYPE_SK_LOOKUP; > case BPF_XDP: > return BPF_PROG_TYPE_XDP; > + case BPF_LSM_CGROUP: > + return BPF_PROG_TYPE_LSM; > default: > return BPF_PROG_TYPE_UNSPEC; > } > @@ -3469,6 +3471,11 @@ static int bpf_prog_attach(const union bpf_attr *attr) > case BPF_PROG_TYPE_CGROUP_SOCKOPT: > case BPF_PROG_TYPE_CGROUP_SYSCTL: > case BPF_PROG_TYPE_SOCK_OPS: > + case BPF_PROG_TYPE_LSM: > + if (ptype == BPF_PROG_TYPE_LSM && > + prog->expected_attach_type != BPF_LSM_CGROUP) Check this in bpf_prog_attach_check_attach_type() where other expected_attach_type are enforced. > +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) > +{ > + struct bpf_shim_tramp_link *shim_link = NULL; > + struct bpf_trampoline *tr; > + bpf_func_t bpf_func; > + u64 key; > + > + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, > + prog->aux->attach_btf_id); > + > + bpf_lsm_find_cgroup_shim(prog, &bpf_func); > + tr = bpf_trampoline_lookup(key); > + if (!tr) nit. !tr should not be possible? If not, may be a WARN_ON_ONCE() or remove this check. > + return; > + > + mutex_lock(&tr->mutex); > + shim_link = cgroup_shim_find(tr, bpf_func); > + mutex_unlock(&tr->mutex); > + > + if (shim_link) > + bpf_link_put(&shim_link->link.link); > + > + bpf_trampoline_put(tr); /* bpf_trampoline_lookup above */ > +} > +#endif [ ... ] > diff --git a/tools/include/linux/btf_ids.h b/tools/include/linux/btf_ids.h > index 57890b357f85..2345b502b439 100644 > --- a/tools/include/linux/btf_ids.h > +++ b/tools/include/linux/btf_ids.h > @@ -172,7 +172,9 @@ extern struct btf_id_set name; > BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP_TW, tcp_timewait_sock) \ > BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock) \ > BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock) \ > - BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) > + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock) \ > + BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock) \ The existing tools's btf_ids.h looks more outdated from the kernel's btf_ids.h. unix_sock is missing which is added back here. mptcp_sock is missing also but not added. I assume the latter test needs unix_sock here ? Others lgtm.