Re: [PATCH bpf-next v5 3/8] bpf: per-cgroup lsm flavor

Martin KaFai Lau <kafai@xxxxxx> · Tue, 26 Apr 2022 17:10:06 -0700

On Tue, Apr 19, 2022 at 12:00:48PM -0700, Stanislav Fomichev wrote:
> +static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
> +{
> +	enum bpf_cgroup_storage_type stype;
> +
> +	for_each_cgroup_storage_type(stype)
> +		bpf_cgroup_storage_unlink(storages[stype]);
> +}
> +
>  /* Called when bpf_cgroup_link is auto-detached from dying cgroup.
>   * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
>   * doesn't free link memory, which will eventually be done by bpf_link's
> @@ -166,6 +256,16 @@ static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
>  	link->cgroup = NULL;
>  }
>  
> +static void bpf_cgroup_lsm_shim_release(struct bpf_prog *prog,
> +					enum cgroup_bpf_attach_type atype)
> +{
> +	if (prog->aux->cgroup_atype < CGROUP_LSM_START ||
> +	    prog->aux->cgroup_atype > CGROUP_LSM_END)
> +		return;
> +
> +	bpf_trampoline_unlink_cgroup_shim(prog);
> +}
> +
>  /**
>   * cgroup_bpf_release() - put references of all bpf programs and
>   *                        release all cgroup bpf data
> @@ -190,10 +290,18 @@ static void cgroup_bpf_release(struct work_struct *work)
>  
>  		hlist_for_each_entry_safe(pl, pltmp, progs, node) {
>  			hlist_del(&pl->node);
> -			if (pl->prog)
> +			if (pl->prog) {
> +				if (atype == BPF_LSM_CGROUP)
> +					bpf_cgroup_lsm_shim_release(pl->prog,
> +								    atype);
>  				bpf_prog_put(pl->prog);
> -			if (pl->link)
> +			}
> +			if (pl->link) {
> +				if (atype == BPF_LSM_CGROUP)
> +					bpf_cgroup_lsm_shim_release(pl->link->link.prog,
> +								    atype);
>  				bpf_cgroup_link_auto_detach(pl->link);
> +			}
>  			kfree(pl);
>  			static_branch_dec(&cgroup_bpf_enabled_key[atype]);
>  		}
> @@ -506,6 +614,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  	struct bpf_prog *old_prog = NULL;
>  	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
>  	struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
> +	struct bpf_attach_target_info tgt_info = {};
>  	enum cgroup_bpf_attach_type atype;
>  	struct bpf_prog_list *pl;
>  	struct hlist_head *progs;
> @@ -522,9 +631,35 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  		/* replace_prog implies BPF_F_REPLACE, and vice versa */
>  		return -EINVAL;
>  
> -	atype = to_cgroup_bpf_attach_type(type);
> -	if (atype < 0)
> -		return -EINVAL;
> +	if (type == BPF_LSM_CGROUP) {
> +		struct bpf_prog *p = prog ? : link->link.prog;
> +
> +		if (replace_prog) {
> +			/* Reusing shim from the original program.
> +			 */
> +			if (replace_prog->aux->attach_btf_id !=
> +			    p->aux->attach_btf_id)
> +				return -EINVAL;
> +
> +			atype = replace_prog->aux->cgroup_atype;
> +		} else {
> +			err = bpf_check_attach_target(NULL, p, NULL,
> +						      p->aux->attach_btf_id,
> +						      &tgt_info);
> +			if (err)
> +				return -EINVAL;
> +
> +			atype = bpf_lsm_attach_type_get(p->aux->attach_btf_id);
> +			if (atype < 0)
> +				return atype;
> +		}
> +
> +		p->aux->cgroup_atype = atype;
> +	} else {
> +		atype = to_cgroup_bpf_attach_type(type);
> +		if (atype < 0)
> +			return -EINVAL;
> +	}
>  
>  	progs = &cgrp->bpf.progs[atype];
>  
> @@ -580,13 +715,26 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
>  	if (err)
>  		goto cleanup;
>  
> +	bpf_cgroup_storages_link(new_storage, cgrp, type);
After looking between this patch 3 and the next patch 4, I can't
quite think this through quickly, so it may be faster to ask :)

I have questions on the ordering between update_effective_progs(),
bpf_cgroup_storages_link(), and bpf_trampoline_link_cgroup_shim().

Why bpf_cgroup_storages_link() has to be moved up and done before
bpf_trampoline_link_cgroup_shim() ?

> +
> +	if (type == BPF_LSM_CGROUP && !old_prog) {
> +		struct bpf_prog *p = prog ? : link->link.prog;
> +
> +		err = bpf_trampoline_link_cgroup_shim(p, &tgt_info);
update_effective_progs() was done a few lines above, so the effective[atype]
array has the new_prog now.

If bpf_trampoline_link_cgroup_shim() did fail to add the
shim_prog to the trampoline, the new_prog will still be left in
effective[atype].  There is no shim_prog to execute effective[].
However, there may be places that access effective[]. e.g.
__cgroup_bpf_query() although I think to_cgroup_bpf_attach_type()
is not handling BPF_LSM_CGROUP now.  More on __cgroup_bpf_query()
later.

Doing bpf_trampoline_link_cgroup_shim() just before activate_effective_progs() ?

Have you thought about what is needed to support __cgroup_bpf_query() ?
bpf_attach_type and cgroup_bpf_attach_type is no longer a 1:1 relationship.
Looping through cgroup_lsm_atype_usecnt[] and output them under BPF_LSM_CGROUP ?
Same goes for local_storage.  All lsm-cgrp attaching to different
attach_btf_id sharing one local_storage because the key is only
cgroup-id and attach_type.  Is it enough to start with that
first and the key could be extended later with a new map_flag?
This is related to the API.

> +		if (err)
> +			goto cleanup_trampoline;
> +	}
> +
>  	if (old_prog)
>  		bpf_prog_put(old_prog);
>  	else
>  		static_branch_inc(&cgroup_bpf_enabled_key[atype]);
> -	bpf_cgroup_storages_link(new_storage, cgrp, type);
> +
>  	return 0;
>  
> +cleanup_trampoline:
> +	bpf_cgroup_storages_unlink(new_storage);
> +
>  cleanup:
>  	if (old_prog) {
>  		pl->prog = old_prog;
> @@ -678,9 +826,13 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
>  	struct hlist_head *progs;
>  	bool found = false;
>  
> -	atype = to_cgroup_bpf_attach_type(link->type);
> -	if (atype < 0)
> -		return -EINVAL;
> +	if (link->type == BPF_LSM_CGROUP) {
> +		atype = link->link.prog->aux->cgroup_atype;
> +	} else {
> +		atype = to_cgroup_bpf_attach_type(link->type);
> +		if (atype < 0)
> +			return -EINVAL;
> +	}
>  
>  	progs = &cgrp->bpf.progs[atype];
>  
> @@ -696,6 +848,9 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
>  	if (!found)
>  		return -ENOENT;
>  
> +	if (link->type == BPF_LSM_CGROUP)
> +		new_prog->aux->cgroup_atype = atype;
> +
>  	old_prog = xchg(&link->link.prog, new_prog);
>  	replace_effective_prog(cgrp, atype, link);
>  	bpf_prog_put(old_prog);
> @@ -779,9 +934,15 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
>  	u32 flags;
>  	int err;
>  
> -	atype = to_cgroup_bpf_attach_type(type);
> -	if (atype < 0)
> -		return -EINVAL;
> +	if (type == BPF_LSM_CGROUP) {
> +		struct bpf_prog *p = prog ? : link->link.prog;
> +
> +		atype = p->aux->cgroup_atype;
> +	} else {
> +		atype = to_cgroup_bpf_attach_type(type);
> +		if (atype < 0)
> +			return -EINVAL;
> +	}
>  
>  	progs = &cgrp->bpf.progs[atype];
>  	flags = cgrp->bpf.flags[atype];
> @@ -803,6 +964,10 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
>  	if (err)
>  		goto cleanup;
>  
> +	if (type == BPF_LSM_CGROUP)
> +		bpf_cgroup_lsm_shim_release(prog ? : link->link.prog,
> +					    atype);
> +
>  	/* now can actually delete it from this cgroup list */
>  	hlist_del(&pl->node);
>  

[ ... ]

> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 0c4fd194e801..c76dfa4ea2d9 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -11,6 +11,8 @@
>  #include <linux/rcupdate_wait.h>
>  #include <linux/module.h>
>  #include <linux/static_call.h>
> +#include <linux/bpf_verifier.h>
> +#include <linux/bpf_lsm.h>
>  
>  /* dummy _ops. The verifier will operate on target program's ops. */
>  const struct bpf_verifier_ops bpf_extension_verifier_ops = {
> @@ -485,6 +487,149 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
>  	return err;
>  }
>  
> +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
> +static struct bpf_prog *cgroup_shim_alloc(const struct bpf_prog *prog,
> +					  bpf_func_t bpf_func)
> +{
> +	struct bpf_prog *p;
> +
> +	p = bpf_prog_alloc(1, 0);
> +	if (!p)
> +		return NULL;
> +
> +	p->jited = false;
> +	p->bpf_func = bpf_func;
> +
> +	p->aux->cgroup_atype = prog->aux->cgroup_atype;
> +	p->aux->attach_func_proto = prog->aux->attach_func_proto;
> +	p->aux->attach_btf_id = prog->aux->attach_btf_id;
> +	p->aux->attach_btf = prog->aux->attach_btf;
> +	btf_get(p->aux->attach_btf);
> +	p->type = BPF_PROG_TYPE_LSM;
> +	p->expected_attach_type = BPF_LSM_MAC;
> +	bpf_prog_inc(p);
> +
> +	return p;
> +}
> +
> +static struct bpf_prog *cgroup_shim_find(struct bpf_trampoline *tr,
> +					 bpf_func_t bpf_func)
> +{
> +	const struct bpf_prog_aux *aux;
> +	int kind;
> +
> +	for (kind = 0; kind < BPF_TRAMP_MAX; kind++) {
> +		hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) {
> +			struct bpf_prog *p = aux->prog;
> +
> +			if (p->bpf_func == bpf_func)
> +				return p;
> +		}
> +	}
> +
> +	return NULL;
> +}
> +
> +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
> +				    struct bpf_attach_target_info *tgt_info)
> +{
> +	struct bpf_prog *shim_prog = NULL;
> +	struct bpf_trampoline *tr;
> +	bpf_func_t bpf_func;
> +	u64 key;
> +	int err;
> +
> +	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> +					 prog->aux->attach_btf_id);
> +
> +	err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> +	if (err)
> +		return err;
> +
> +	tr = bpf_trampoline_get(key, tgt_info);
> +	if (!tr)
> +		return  -ENOMEM;
> +
> +	mutex_lock(&tr->mutex);
> +
> +	shim_prog = cgroup_shim_find(tr, bpf_func);
> +	if (shim_prog) {
> +		/* Reusing existing shim attached by the other program.
> +		 */
> +		bpf_prog_inc(shim_prog);
> +		mutex_unlock(&tr->mutex);
> +		return 0;
> +	}
> +
> +	/* Allocate and install new shim.
> +	 */
> +
> +	shim_prog = cgroup_shim_alloc(prog, bpf_func);
> +	if (!shim_prog) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +
> +	err = __bpf_trampoline_link_prog(shim_prog, tr);
> +	if (err)
> +		goto out;
> +
> +	mutex_unlock(&tr->mutex);
> +
> +	return 0;
> +out:
> +	if (shim_prog)
> +		bpf_prog_put(shim_prog);
> +
> +	mutex_unlock(&tr->mutex);
> +	return err;
> +}
> +
> +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
> +{
> +	struct bpf_prog *shim_prog;
> +	struct bpf_trampoline *tr;
> +	bpf_func_t bpf_func;
> +	u64 key;
> +	int err;
> +
> +	key = bpf_trampoline_compute_key(NULL, prog->aux->attach_btf,
> +					 prog->aux->attach_btf_id);
> +
> +	err = bpf_lsm_find_cgroup_shim(prog, &bpf_func);
> +	if (err)
> +		return;
> +
> +	tr = bpf_trampoline_lookup(key);
> +	if (!tr)
> +		return;
> +
> +	mutex_lock(&tr->mutex);
> +
> +	shim_prog = cgroup_shim_find(tr, bpf_func);
> +	if (shim_prog) {
> +		/* We use shim_prog refcnt for tracking whether to
> +		 * remove the shim program from the trampoline.
> +		 * Trampoline's mutex is held while refcnt is
> +		 * added/subtracted so we don't need to care about
> +		 * potential races.
> +		 */
> +
> +		if (atomic64_read(&shim_prog->aux->refcnt) == 1)
> +			WARN_ON_ONCE(__bpf_trampoline_unlink_prog(shim_prog, tr));
> +
> +		bpf_prog_put(shim_prog);
> +	}
> +
> +	mutex_unlock(&tr->mutex);
> +
> +	bpf_trampoline_put(tr); /* bpf_trampoline_lookup */
> +
> +	if (shim_prog)
> +		bpf_trampoline_put(tr);
> +}
> +#endif
> +