On Fri, Apr 29, 2022 at 02:15:33PM -0700, Stanislav Fomichev wrote: > diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c > index 064eccba641d..a0e68ef5dfb1 100644 > --- a/kernel/bpf/bpf_lsm.c > +++ b/kernel/bpf/bpf_lsm.c > @@ -16,6 +16,7 @@ > #include <linux/bpf_local_storage.h> > #include <linux/btf_ids.h> > #include <linux/ima.h> > +#include <linux/bpf-cgroup.h> > > /* For every LSM hook that allows attachment of BPF programs, declare a nop > * function where a BPF program can be attached. > @@ -35,6 +36,66 @@ BTF_SET_START(bpf_lsm_hooks) > #undef LSM_HOOK > BTF_SET_END(bpf_lsm_hooks) > > +/* List of LSM hooks that should operate on 'current' cgroup regardless > + * of function signature. > + */ > +BTF_SET_START(bpf_lsm_current_hooks) > +/* operate on freshly allocated sk without any cgroup association */ > +BTF_ID(func, bpf_lsm_sk_alloc_security) > +BTF_ID(func, bpf_lsm_sk_free_security) > +BTF_SET_END(bpf_lsm_current_hooks) > + > +int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, > + bpf_func_t *bpf_func) > +{ > + const struct btf_type *first_arg_type; > + const struct btf_type *sock_type; > + const struct btf_type *sk_type; > + const struct btf *btf_vmlinux; > + const struct btf_param *args; > + s32 type_id; > + > + if (!prog->aux->attach_func_proto || > + !btf_type_is_func_proto(prog->aux->attach_func_proto)) Also remove these tests during the attach time. These should have already been checked during the load time. > + return -EINVAL; > + > + if (btf_type_vlen(prog->aux->attach_func_proto) < 1 || > + btf_id_set_contains(&bpf_lsm_current_hooks, > + prog->aux->attach_btf_id)) { > + *bpf_func = __cgroup_bpf_run_lsm_current; > + return 0; > + } > + > + args = btf_params(prog->aux->attach_func_proto); > + > + btf_vmlinux = bpf_get_btf_vmlinux(); > + > + type_id = btf_find_by_name_kind(btf_vmlinux, "socket", BTF_KIND_STRUCT); > + if (type_id < 0) > + return -EINVAL; > + sock_type = btf_type_by_id(btf_vmlinux, type_id); > + > + type_id = btf_find_by_name_kind(btf_vmlinux, "sock", BTF_KIND_STRUCT); > + if (type_id < 0) > + return -EINVAL; > + sk_type = btf_type_by_id(btf_vmlinux, type_id); Although practical kconfig should have CONFIG_NET, not sure btf has "socket" and "sock" in some unusual kconfig setup. If it does not, it will return error too early for other hooks. May be put them under "#ifdef CONFIG_NET"? While adding CONFIG_NET, it will be easier to just use the btf_sock_ids[]. "socket" needs to be added to btf_sock_ids[]. Take a look at btf_ids.h and filter.c. > + > + first_arg_type = btf_type_resolve_ptr(btf_vmlinux, args[0].type, NULL); > + if (first_arg_type == sock_type) > + *bpf_func = __cgroup_bpf_run_lsm_socket; > + else if (first_arg_type == sk_type) > + *bpf_func = __cgroup_bpf_run_lsm_sock; > + else > + *bpf_func = __cgroup_bpf_run_lsm_current; > + > + return 0; > +} [ ... ] > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 134785ab487c..9cc38454e402 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -14,6 +14,9 @@ > #include <linux/string.h> > #include <linux/bpf.h> > #include <linux/bpf-cgroup.h> > +#include <linux/btf_ids.h> > +#include <linux/bpf_lsm.h> > +#include <linux/bpf_verifier.h> > #include <net/sock.h> > #include <net/bpf_sk_storage.h> > > @@ -61,6 +64,85 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, > return run_ctx.retval; > } > > +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, > + const struct bpf_insn *insn) > +{ > + const struct bpf_prog *shim_prog; > + struct sock *sk; > + struct cgroup *cgrp; > + int ret = 0; > + u64 *regs; > + > + regs = (u64 *)ctx; > + sk = (void *)(unsigned long)regs[BPF_REG_0]; > + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ > + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); > + > + cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); > + if (likely(cgrp)) > + ret = bpf_prog_run_array_cg(&cgrp->bpf, > + shim_prog->aux->cgroup_atype, > + ctx, bpf_prog_run, 0, NULL); > + return ret; > +} > + > +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, > + const struct bpf_insn *insn) > +{ > + const struct bpf_prog *shim_prog; > + struct socket *sock; > + struct cgroup *cgrp; > + int ret = 0; > + u64 *regs; > + > + regs = (u64 *)ctx; > + sock = (void *)(unsigned long)regs[BPF_REG_0]; > + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ > + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); > + > + cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); > + if (likely(cgrp)) > + ret = bpf_prog_run_array_cg(&cgrp->bpf, > + shim_prog->aux->cgroup_atype, > + ctx, bpf_prog_run, 0, NULL); > + return ret; > +} > + > +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, > + const struct bpf_insn *insn) > +{ > + const struct bpf_prog *shim_prog; > + struct cgroup *cgrp; > + int ret = 0; >From lsm_hook_defs.h, there are some default return values that are not 0. Is it ok to always return 0 in cases like the cgroup array is empty ? > + > + if (unlikely(!current)) > + return 0; > + > + /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ > + shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); > + > + rcu_read_lock(); > + cgrp = task_dfl_cgroup(current); > + if (likely(cgrp)) > + ret = bpf_prog_run_array_cg(&cgrp->bpf, > + shim_prog->aux->cgroup_atype, > + ctx, bpf_prog_run, 0, NULL); > + rcu_read_unlock(); > + return ret; > +} [ ... ] > @@ -479,6 +572,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > struct bpf_prog *old_prog = NULL; > struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; > struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; > + struct bpf_attach_target_info tgt_info = {}; > enum cgroup_bpf_attach_type atype; > struct bpf_prog_list *pl; > struct hlist_head *progs; > @@ -495,9 +589,34 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, > /* replace_prog implies BPF_F_REPLACE, and vice versa */ > return -EINVAL; > > - atype = to_cgroup_bpf_attach_type(type); > - if (atype < 0) > - return -EINVAL; > + if (type == BPF_LSM_CGROUP) { > + struct bpf_prog *p = prog ? : link->link.prog; nit. This "prog ? ...." has been done multiple times (new and existing codes) in this function. may be do it once at the very beginning. > + > + if (replace_prog) { > + /* Reusing shim from the original program. */ > + if (replace_prog->aux->attach_btf_id != > + p->aux->attach_btf_id) > + return -EINVAL; > + > + atype = replace_prog->aux->cgroup_atype; > + } else { > + err = bpf_check_attach_target(NULL, p, NULL, > + p->aux->attach_btf_id, > + &tgt_info); > + if (err) > + return -EINVAL; return err; > + > + atype = bpf_lsm_attach_type_get(p->aux->attach_btf_id); > + if (atype < 0) > + return atype; > + } > + > + p->aux->cgroup_atype = atype; > + } else { > + atype = to_cgroup_bpf_attach_type(type); > + if (atype < 0) > + return -EINVAL; > + } > > progs = &cgrp->bpf.progs[atype]; [ ... ] > diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c > index 0c4fd194e801..77dfa517c47c 100644 > --- a/kernel/bpf/trampoline.c > +++ b/kernel/bpf/trampoline.c > @@ -11,6 +11,8 @@ > #include <linux/rcupdate_wait.h> > #include <linux/module.h> > #include <linux/static_call.h> > +#include <linux/bpf_verifier.h> > +#include <linux/bpf_lsm.h> > > /* dummy _ops. The verifier will operate on target program's ops. */ > const struct bpf_verifier_ops bpf_extension_verifier_ops = { > @@ -485,6 +487,147 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) > return err; > } > > +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) > +static struct bpf_prog *cgroup_shim_alloc(const struct bpf_prog *prog, > + bpf_func_t bpf_func) > +{ > + struct bpf_prog *p; > + > + p = bpf_prog_alloc(1, 0); > + if (!p) > + return NULL; > + > + p->jited = false; > + p->bpf_func = bpf_func; > + > + p->aux->cgroup_atype = prog->aux->cgroup_atype; > + p->aux->attach_func_proto = prog->aux->attach_func_proto; > + p->aux->attach_btf_id = prog->aux->attach_btf_id; > + p->aux->attach_btf = prog->aux->attach_btf; > + btf_get(p->aux->attach_btf); > + p->type = BPF_PROG_TYPE_LSM; > + p->expected_attach_type = BPF_LSM_MAC; > + bpf_prog_inc(p); > + > + return p; > +} > + > +static struct bpf_prog *cgroup_shim_find(struct bpf_trampoline *tr, > + bpf_func_t bpf_func) > +{ > + const struct bpf_prog_aux *aux; > + int kind; > + > + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { > + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) { > + struct bpf_prog *p = aux->prog; > + > + if (p->bpf_func == bpf_func) > + return p; > + } > + } > + > + return NULL; > +} > + > +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog, > + struct bpf_attach_target_info *tgt_info) > +{ > + struct bpf_prog *shim_prog = NULL; > + struct bpf_trampoline *tr; > + bpf_func_t bpf_func; > + u64 key; > + int err; > + > + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, > + prog->aux->attach_btf_id); > + > + err = bpf_lsm_find_cgroup_shim(prog, &bpf_func); > + if (err) > + return err; > + > + tr = bpf_trampoline_get(key, tgt_info); > + if (!tr) > + return -ENOMEM; > + > + mutex_lock(&tr->mutex); > + > + shim_prog = cgroup_shim_find(tr, bpf_func); > + if (shim_prog) { > + /* Reusing existing shim attached by the other program. */ > + bpf_prog_inc(shim_prog); > + mutex_unlock(&tr->mutex); > + return 0; > + } > + > + /* Allocate and install new shim. */ > + > + shim_prog = cgroup_shim_alloc(prog, bpf_func); > + if (!shim_prog) { > + err = -ENOMEM; > + goto out; > + } > + > + err = __bpf_trampoline_link_prog(shim_prog, tr); > + if (err) > + goto out; > + > + mutex_unlock(&tr->mutex); > + > + return 0; > +out: > + if (shim_prog) > + bpf_prog_put(shim_prog); > + bpf_trampoline_get() was done earlier. Does it need to call bpf_trampoline_put(tr) in error cases ? More on tr refcnt later. > + mutex_unlock(&tr->mutex); > + return err; > +} > + > +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog) > +{ > + struct bpf_prog *shim_prog; > + struct bpf_trampoline *tr; > + bpf_func_t bpf_func; > + u64 key; > + int err; > + > + key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf, > + prog->aux->attach_btf_id); > + > + err = bpf_lsm_find_cgroup_shim(prog, &bpf_func); > + if (err) > + return; > + > + tr = bpf_trampoline_lookup(key); > + if (!tr) > + return; > + > + mutex_lock(&tr->mutex); > + > + shim_prog = cgroup_shim_find(tr, bpf_func); > + if (shim_prog) { > + /* We use shim_prog refcnt for tracking whether to > + * remove the shim program from the trampoline. > + * Trampoline's mutex is held while refcnt is > + * added/subtracted so we don't need to care about > + * potential races. > + */ > + > + if (atomic64_read(&shim_prog->aux->refcnt) == 1) > + WARN_ON_ONCE(__bpf_trampoline_unlink_prog(shim_prog, tr)); > + > + bpf_prog_put(shim_prog); > + } > + > + mutex_unlock(&tr->mutex); > + > + bpf_trampoline_put(tr); /* bpf_trampoline_lookup */ > + > + if (shim_prog) > + bpf_trampoline_put(tr); While looking at the bpf_trampoline_{link,unlink}_cgroup_shim() again, which prog (cgroup lsm progs or shim_prog) actually owns the tr. It should be the shim_prog ? How about storing tr in "shim_prog->aux->dst_trampoline = tr;" Then the earlier bpf_prog_put(shim_prog) in this function will take care of the bpf_trampoline_put(shim_prog->aux->dst_trampoline) instead of each cgroup lsm prog owns a refcnt of the tr and then this individual bpf_trampoline_put(tr) here can also go away. > +} > +#endif > + > struct bpf_trampoline *bpf_trampoline_get(u64 key, > struct bpf_attach_target_info *tgt_info) > { > @@ -609,6 +752,24 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) > rcu_read_unlock(); > } > > +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog) > + __acquires(RCU) > +{ > + /* Runtime stats are exported via actual BPF_LSM_CGROUP > + * programs, not the shims. > + */ > + rcu_read_lock(); > + migrate_disable(); > + return NO_START_TIME; > +} > + > +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start) > + __releases(RCU) > +{ > + migrate_enable(); > + rcu_read_unlock(); > +} > + > u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) > { > rcu_read_lock_trace(); > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 813f6ee80419..99703d96c579 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -14455,6 +14455,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, > fallthrough; > case BPF_MODIFY_RETURN: > case BPF_LSM_MAC: > + case BPF_LSM_CGROUP: > case BPF_TRACE_FENTRY: > case BPF_TRACE_FEXIT: > if (!btf_type_is_func(t)) { > @@ -14633,6 +14634,33 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) > return 0; > } > > +static int check_used_maps(struct bpf_verifier_env *env) > +{ > + int i; > + > + for (i = 0; i < env->used_map_cnt; i++) { > + struct bpf_map *map = env->used_maps[i]; > + > + switch (env->prog->expected_attach_type) { > + case BPF_LSM_CGROUP: > + if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && > + map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) > + break; > + > + if (map->key_size != sizeof(__u64)) { Follow on my very last comment in v5. I think we should not limit the bpf_cgroup_storage_key and should allow using both cgroup_inode_id and attach_type as the key to avoid inconsistent surprise (some keys are not allowed in a specific attach_type), so this check_used_maps() function can also be avoided. > + verbose(env, "only global cgroup local storage is supported for BPF_LSM_CGROUP\n"); > + return -EINVAL; > + } > + > + break; > + default: > + break; > + } > + } > + > + return 0; > +} > + > struct btf *bpf_get_btf_vmlinux(void) > { > if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { > @@ -14854,6 +14882,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) > convert_pseudo_ld_imm64(env); > } > > + ret = check_used_maps(env); > + if (ret < 0) > + goto err_release_maps; > + > adjust_btf_func(env); > > err_release_maps: > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 444fe6f1cf35..112e396bbe65 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -998,6 +998,7 @@ enum bpf_attach_type { > BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, > BPF_PERF_EVENT, > BPF_TRACE_KPROBE_MULTI, > + BPF_LSM_CGROUP, > __MAX_BPF_ATTACH_TYPE > }; > > -- > 2.36.0.464.gb9c8b46e94-goog >