Re: [PATCH bpf-next v6 03/10] bpf: per-cgroup lsm flavor

Martin KaFai Lau <kafai@xxxxxx> · Fri, 6 May 2022 16:02:44 -0700

On Fri, Apr 29, 2022 at 02:15:33PM -0700, Stanislav Fomichev wrote:
> diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
> index 064eccba641d..a0e68ef5dfb1 100644
> --- a/kernel/bpf/bpf_lsm.c
> +++ b/kernel/bpf/bpf_lsm.c
> @@ -16,6 +16,7 @@
>  #include <linux/bpf_local_storage.h>
>  #include <linux/btf_ids.h>
>  #include <linux/ima.h>
> +#include <linux/bpf-cgroup.h>
>  
>  /* For every LSM hook that allows attachment of BPF programs, declare a nop
>   * function where a BPF program can be attached.
> @@ -35,6 +36,66 @@ BTF_SET_START(bpf_lsm_hooks)
>  #undef LSM_HOOK
>  BTF_SET_END(bpf_lsm_hooks)
>  
> +/* List of LSM hooks that should operate on 'current' cgroup regardless
> + * of function signature.
> + */
> +BTF_SET_START(bpf_lsm_current_hooks)
> +/* operate on freshly allocated sk without any cgroup association */
> +BTF_ID(func, bpf_lsm_sk_alloc_security)
> +BTF_ID(func, bpf_lsm_sk_free_security)
> +BTF_SET_END(bpf_lsm_current_hooks)
> +
> +int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
> +			     bpf_func_t *bpf_func)
> +{
> +	const struct btf_type *first_arg_type;
> +	const struct btf_type *sock_type;
> +	const struct btf_type *sk_type;
> +	const struct btf *btf_vmlinux;
> +	const struct btf_param *args;
> +	s32 type_id;
> +
> +	if (!prog->aux->attach_func_proto ||
> +	    !btf_type_is_func_proto(prog->aux->attach_func_proto))
Also remove these tests during the attach time.  These should have
already been checked during the load time.

> +		return -EINVAL;
> +
> +	if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
> +	    btf_id_set_contains(&bpf_lsm_current_hooks,
> +				prog->aux->attach_btf_id)) {
> +		*bpf_func = __cgroup_bpf_run_lsm_current;
> +		return 0;
> +	}
> +
> +	args = btf_params(prog->aux->attach_func_proto);
> +
> +	btf_vmlinux = bpf_get_btf_vmlinux();
> +
> +	type_id = btf_find_by_name_kind(btf_vmlinux, "socket", BTF_KIND_STRUCT);
> +	if (type_id < 0)
> +		return -EINVAL;
> +	sock_type = btf_type_by_id(btf_vmlinux, type_id);
> +
> +	type_id = btf_find_by_name_kind(btf_vmlinux, "sock", BTF_KIND_STRUCT);
> +	if (type_id < 0)
> +		return -EINVAL;
> +	sk_type = btf_type_by_id(btf_vmlinux, type_id);
Although practical kconfig should have CONFIG_NET, not sure btf has
"socket" and "sock" in some unusual kconfig setup.  If it does not, it will
return error too early for other hooks.

May be put them under "#ifdef CONFIG_NET"?  While adding CONFIG_NET,
it will be easier to just use the btf_sock_ids[].  "socket" needs to be
added to btf_sock_ids[].  Take a look at btf_ids.h and filter.c.

> +
> +	first_arg_type = btf_type_resolve_ptr(btf_vmlinux, args[0].type, NULL);
> +	if (first_arg_type == sock_type)
> +		*bpf_func = __cgroup_bpf_run_lsm_socket;
> +	else if (first_arg_type == sk_type)
> +		*bpf_func = __cgroup_bpf_run_lsm_sock;
> +	else
> +		*bpf_func = __cgroup_bpf_run_lsm_current;
> +
> +	return 0;
> +}

[ ... ]

> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> index 134785ab487c..9cc38454e402 100644
> --- a/kernel/bpf/cgroup.c
> +++ b/kernel/bpf/cgroup.c
> @@ -14,6 +14,9 @@
>  #include <linux/string.h>
>  #include <linux/bpf.h>
>  #include <linux/bpf-cgroup.h>
> +#include <linux/btf_ids.h>
> +#include <linux/bpf_lsm.h>
> +#include <linux/bpf_verifier.h>
>  #include <net/sock.h>
>  #include <net/bpf_sk_storage.h>
>  
> @@ -61,6 +64,85 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
>  	return run_ctx.retval;
>  }
>  
> +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
> +				       const struct bpf_insn *insn)
> +{
> +	const struct bpf_prog *shim_prog;
> +	struct sock *sk;
> +	struct cgroup *cgrp;
> +	int ret = 0;
> +	u64 *regs;
> +
> +	regs = (u64 *)ctx;
> +	sk = (void *)(unsigned long)regs[BPF_REG_0];
> +	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> +	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> +	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> +	if (likely(cgrp))
> +		ret = bpf_prog_run_array_cg(&cgrp->bpf,
> +					    shim_prog->aux->cgroup_atype,
> +					    ctx, bpf_prog_run, 0, NULL);
> +	return ret;
> +}
> +
> +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
> +					 const struct bpf_insn *insn)
> +{
> +	const struct bpf_prog *shim_prog;
> +	struct socket *sock;
> +	struct cgroup *cgrp;
> +	int ret = 0;
> +	u64 *regs;
> +
> +	regs = (u64 *)ctx;
> +	sock = (void *)(unsigned long)regs[BPF_REG_0];
> +	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> +	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> +	cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
> +	if (likely(cgrp))
> +		ret = bpf_prog_run_array_cg(&cgrp->bpf,
> +					    shim_prog->aux->cgroup_atype,
> +					    ctx, bpf_prog_run, 0, NULL);
> +	return ret;
> +}
> +
> +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
> +					  const struct bpf_insn *insn)
> +{
> +	const struct bpf_prog *shim_prog;
> +	struct cgroup *cgrp;
> +	int ret = 0;
>From lsm_hook_defs.h, there are some default return values that are not 0.
Is it ok to always return 0 in cases like the cgroup array is empty ?

> +
> +	if (unlikely(!current))
> +		return 0;
> +
> +	/*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> +	shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> +
> +	rcu_read_lock();
> +	cgrp = task_dfl_cgroup(current);
> +	if (likely(cgrp))
> +		ret = bpf_prog_run_array_cg(&cgrp->bpf,
> +					    shim_prog->aux->cgroup_atype,
> +					    ctx, bpf_prog_run, 0, NULL);
> +	rcu_read_unlock();
> +	return ret;
> +}

[ ... ]

> @@ -479,6 +572,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  	struct bpf_prog *old_prog = NULL;
>  	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
>  	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
> +	struct bpf_attach_target_info tgt_info = {};
>  	enum cgroup_bpf_attach_type atype;
>  	struct bpf_prog_list *pl;
>  	struct hlist_head *progs;
> @@ -495,9 +589,34 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  		/* replace_prog implies BPF_F_REPLACE, and vice versa */
>  		return -EINVAL;
>  
> -	atype = to_cgroup_bpf_attach_type(type);
> -	if (atype < 0)
> -		return -EINVAL;
> +	if (type == BPF_LSM_CGROUP) {
> +		struct bpf_prog *p = prog ? : link->link.prog;
nit. This "prog ? ...." has been done multiple times (new and existing codes)
in this function.  may be do it once at the very beginning.

> +
> +		if (replace_prog) {
> +			/* Reusing shim from the original program. */
> +			if (replace_prog->aux->attach_btf_id !=
> +			    p->aux->attach_btf_id)
> +				return -EINVAL;
> +
> +			atype = replace_prog->aux->cgroup_atype;
> +		} else {
> +			err = bpf_check_attach_target(NULL, p, NULL,
> +						      p->aux->attach_btf_id,
> +						      &tgt_info);
> +			if (err)
> +				return -EINVAL;
return err;

> +
> +			atype = bpf_lsm_attach_type_get(p->aux->attach_btf_id);
> +			if (atype < 0)
> +				return atype;
> +		}
> +
> +		p->aux->cgroup_atype = atype;
> +	} else {
> +		atype = to_cgroup_bpf_attach_type(type);
> +		if (atype < 0)
> +			return -EINVAL;
> +	}
>  
>  	progs = &cgrp->bpf.progs[atype];

[ ... ]

> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 0c4fd194e801..77dfa517c47c 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -11,6 +11,8 @@
>  #include <linux/rcupdate_wait.h>
>  #include <linux/module.h>
>  #include <linux/static_call.h>
> +#include <linux/bpf_verifier.h>
> +#include <linux/bpf_lsm.h>
>  
>  /* dummy _ops. The verifier will operate on target program's ops. */
>  const struct bpf_verifier_ops bpf_extension_verifier_ops = {
> @@ -485,6 +487,147 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
>  	return err;
>  }
>  
> +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
> +static struct bpf_prog *cgroup_shim_alloc(const struct bpf_prog *prog,
> +					  bpf_func_t bpf_func)
> +{
> +	struct bpf_prog *p;
> +
> +	p = bpf_prog_alloc(1, 0);
> +	if (!p)
> +		return NULL;
> +
> +	p->jited = false;
> +	p->bpf_func = bpf_func;
> +
> +	p->aux->cgroup_atype = prog->aux->cgroup_atype;
> +	p->aux->attach_func_proto = prog->aux->attach_func_proto;
> +	p->aux->attach_btf_id = prog->aux->attach_btf_id;
> +	p->aux->attach_btf = prog->aux->attach_btf;
> +	btf_get(p->aux->attach_btf);
> +	p->type = BPF_PROG_TYPE_LSM;
> +	p->expected_attach_type = BPF_LSM_MAC;
> +	bpf_prog_inc(p);
> +
> +	return p;
> +}
> +
> +static struct bpf_prog *cgroup_shim_find(struct bpf_trampoline *tr,
> +					 bpf_func_t bpf_func)
> +{
> +	const struct bpf_prog_aux *aux;
> +	int kind;
> +
> +	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
> +		hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
> +			struct bpf_prog *p = aux->prog;
> +
> +			if (p->bpf_func == bpf_func)
> +				return p;
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
> +				    struct bpf_attach_target_info *tgt_info)
> +{
> +	struct bpf_prog *shim_prog = NULL;
> +	struct bpf_trampoline *tr;
> +	bpf_func_t bpf_func;
> +	u64 key;
> +	int err;
> +
> +	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> +					 prog->aux->attach_btf_id);
> +
> +	err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> +	if (err)
> +		return err;
> +
> +	tr = bpf_trampoline_get(key, tgt_info);
> +	if (!tr)
> +		return  -ENOMEM;
> +
> +	mutex_lock(&tr->mutex);
> +
> +	shim_prog = cgroup_shim_find(tr, bpf_func);
> +	if (shim_prog) {
> +		/* Reusing existing shim attached by the other program. */
> +		bpf_prog_inc(shim_prog);
> +		mutex_unlock(&tr->mutex);
> +		return 0;
> +	}
> +
> +	/* Allocate and install new shim. */
> +
> +	shim_prog = cgroup_shim_alloc(prog, bpf_func);
> +	if (!shim_prog) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	err = __bpf_trampoline_link_prog(shim_prog, tr);
> +	if (err)
> +		goto out;
> +
> +	mutex_unlock(&tr->mutex);
> +
> +	return 0;
> +out:
> +	if (shim_prog)
> +		bpf_prog_put(shim_prog);
> +
bpf_trampoline_get() was done earlier.
Does it need to call bpf_trampoline_put(tr) in error cases ?
More on tr refcnt later.

> +	mutex_unlock(&tr->mutex);
> +	return err;
> +}
> +
> +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
> +{
> +	struct bpf_prog *shim_prog;
> +	struct bpf_trampoline *tr;
> +	bpf_func_t bpf_func;
> +	u64 key;
> +	int err;
> +
> +	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> +					 prog->aux->attach_btf_id);
> +
> +	err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> +	if (err)
> +		return;
> +
> +	tr = bpf_trampoline_lookup(key);
> +	if (!tr)
> +		return;
> +
> +	mutex_lock(&tr->mutex);
> +
> +	shim_prog = cgroup_shim_find(tr, bpf_func);
> +	if (shim_prog) {
> +		/* We use shim_prog refcnt for tracking whether to
> +		 * remove the shim program from the trampoline.
> +		 * Trampoline's mutex is held while refcnt is
> +		 * added/subtracted so we don't need to care about
> +		 * potential races.
> +		 */
> +
> +		if (atomic64_read(&shim_prog->aux->refcnt) == 1)
> +			WARN_ON_ONCE(__bpf_trampoline_unlink_prog(shim_prog, tr));
> +
> +		bpf_prog_put(shim_prog);
> +	}
> +
> +	mutex_unlock(&tr->mutex);
> +
> +	bpf_trampoline_put(tr); /* bpf_trampoline_lookup */
> +
> +	if (shim_prog)
> +		bpf_trampoline_put(tr);
While looking at the bpf_trampoline_{link,unlink}_cgroup_shim() again,
which prog (cgroup lsm progs or shim_prog) actually owns the tr.
It should be the shim_prog ?

How about storing tr in "shim_prog->aux->dst_trampoline = tr;"
Then the earlier bpf_prog_put(shim_prog) in this function will take care
of the bpf_trampoline_put(shim_prog->aux->dst_trampoline)
instead of each cgroup lsm prog owns a refcnt of the tr
and then this individual bpf_trampoline_put(tr) here can also
go away.

> +}
> +#endif
> +
>  struct bpf_trampoline *bpf_trampoline_get(u64 key,
>  					  struct bpf_attach_target_info *tgt_info)
>  {
> @@ -609,6 +752,24 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
>  	rcu_read_unlock();
>  }
>  
> +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog)
> +	__acquires(RCU)
> +{
> +	/* Runtime stats are exported via actual BPF_LSM_CGROUP
> +	 * programs, not the shims.
> +	 */
> +	rcu_read_lock();
> +	migrate_disable();
> +	return NO_START_TIME;
> +}
> +
> +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start)
> +	__releases(RCU)
> +{
> +	migrate_enable();
> +	rcu_read_unlock();
> +}
> +
>  u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
>  {
>  	rcu_read_lock_trace();
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 813f6ee80419..99703d96c579 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -14455,6 +14455,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
>  		fallthrough;
>  	case BPF_MODIFY_RETURN:
>  	case BPF_LSM_MAC:
> +	case BPF_LSM_CGROUP:
>  	case BPF_TRACE_FENTRY:
>  	case BPF_TRACE_FEXIT:
>  		if (!btf_type_is_func(t)) {
> @@ -14633,6 +14634,33 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
>  	return 0;
>  }
>  
> +static int check_used_maps(struct bpf_verifier_env *env)
> +{
> +	int i;
> +
> +	for (i = 0; i < env->used_map_cnt; i++) {
> +		struct bpf_map *map = env->used_maps[i];
> +
> +		switch (env->prog->expected_attach_type) {
> +		case BPF_LSM_CGROUP:
> +			if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
> +			    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
> +				break;
> +
> +			if (map->key_size != sizeof(__u64)) {
Follow on my very last comment in v5.  I think we should not
limit the bpf_cgroup_storage_key and should allow using both
cgroup_inode_id and attach_type as the key to avoid
inconsistent surprise (some keys are not allowed in a specific
attach_type), so this check_used_maps() function can also be avoided.

> +				verbose(env, "only global cgroup local storage is supported for BPF_LSM_CGROUP\n");
> +				return -EINVAL;
> +			}
> +
> +			break;
> +		default:
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>  struct btf *bpf_get_btf_vmlinux(void)
>  {
>  	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
> @@ -14854,6 +14882,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
>  		convert_pseudo_ld_imm64(env);
>  	}
>  
> +	ret = check_used_maps(env);
> +	if (ret < 0)
> +		goto err_release_maps;
> +
>  	adjust_btf_func(env);
>  
>  err_release_maps:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 444fe6f1cf35..112e396bbe65 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -998,6 +998,7 @@ enum bpf_attach_type {
>  	BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
>  	BPF_PERF_EVENT,
>  	BPF_TRACE_KPROBE_MULTI,
> +	BPF_LSM_CGROUP,
>  	__MAX_BPF_ATTACH_TYPE
>  };
>  
> -- 
> 2.36.0.464.gb9c8b46e94-goog
>