On Wed, May 18, 2022 at 03:55:24PM -0700, Stanislav Fomichev wrote: > Previous patch adds 1:1 mapping between all 211 LSM hooks > and bpf_cgroup program array. Instead of reserving a slot per > possible hook, reserve 10 slots per cgroup for lsm programs. > Those slots are dynamically allocated on demand and reclaimed. > > struct cgroup_bpf { > struct bpf_prog_array * effective[33]; /* 0 264 */ > /* --- cacheline 4 boundary (256 bytes) was 8 bytes ago --- */ > struct hlist_head progs[33]; /* 264 264 */ > /* --- cacheline 8 boundary (512 bytes) was 16 bytes ago --- */ > u8 flags[33]; /* 528 33 */ > > /* XXX 7 bytes hole, try to pack */ > > struct list_head storages; /* 568 16 */ > /* --- cacheline 9 boundary (576 bytes) was 8 bytes ago --- */ > struct bpf_prog_array * inactive; /* 584 8 */ > struct percpu_ref refcnt; /* 592 16 */ > struct work_struct release_work; /* 608 72 */ > > /* size: 680, cachelines: 11, members: 7 */ > /* sum members: 673, holes: 1, sum holes: 7 */ > /* last cacheline: 40 bytes */ > }; > > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx> > --- > include/linux/bpf-cgroup-defs.h | 3 +- > include/linux/bpf_lsm.h | 6 -- > kernel/bpf/bpf_lsm.c | 5 -- > kernel/bpf/cgroup.c | 135 +++++++++++++++++++++++++++++--- > 4 files changed, 125 insertions(+), 24 deletions(-) > > diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h > index d5a70a35dace..359d3f16abea 100644 > --- a/include/linux/bpf-cgroup-defs.h > +++ b/include/linux/bpf-cgroup-defs.h > @@ -10,7 +10,8 @@ > > struct bpf_prog_array; > > -#define CGROUP_LSM_NUM 211 /* will be addressed in the next patch */ > +/* Maximum number of concurrently attachable per-cgroup LSM hooks. */ > +#define CGROUP_LSM_NUM 10 > > enum cgroup_bpf_attach_type { > CGROUP_BPF_ATTACH_TYPE_INVALID = -1, > diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h > index 7f0e59f5f9be..613de44aa429 100644 > --- a/include/linux/bpf_lsm.h > +++ b/include/linux/bpf_lsm.h > @@ -43,7 +43,6 @@ extern const struct bpf_func_proto bpf_inode_storage_delete_proto; > void bpf_inode_storage_free(struct inode *inode); > > int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func); > -int bpf_lsm_hook_idx(u32 btf_id); > > #else /* !CONFIG_BPF_LSM */ > > @@ -74,11 +73,6 @@ static inline int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, > return -ENOENT; > } > > -static inline int bpf_lsm_hook_idx(u32 btf_id) > -{ > - return -EINVAL; > -} > - > #endif /* CONFIG_BPF_LSM */ > > #endif /* _LINUX_BPF_LSM_H */ > diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c > index 654c23577ad3..96503c3e7a71 100644 > --- a/kernel/bpf/bpf_lsm.c > +++ b/kernel/bpf/bpf_lsm.c > @@ -71,11 +71,6 @@ int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, > return 0; > } > > -int bpf_lsm_hook_idx(u32 btf_id) > -{ > - return btf_id_set_index(&bpf_lsm_hooks, btf_id); > -} > - > int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog, > const struct bpf_prog *prog) > { > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c > index 2c356a38f4cf..a959cdd22870 100644 > --- a/kernel/bpf/cgroup.c > +++ b/kernel/bpf/cgroup.c > @@ -132,15 +132,110 @@ unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, > } > > #ifdef CONFIG_BPF_LSM > +struct list_head unused_bpf_lsm_atypes; > +struct list_head used_bpf_lsm_atypes; > + > +struct bpf_lsm_attach_type { > + int index; > + u32 btf_id; > + int usecnt; > + struct list_head atypes; > + struct rcu_head rcu_head; > +}; > + > +static int __init bpf_lsm_attach_type_init(void) > +{ > + struct bpf_lsm_attach_type *atype; > + int i; > + > + INIT_LIST_HEAD_RCU(&unused_bpf_lsm_atypes); > + INIT_LIST_HEAD_RCU(&used_bpf_lsm_atypes); > + > + for (i = 0; i < CGROUP_LSM_NUM; i++) { > + atype = kzalloc(sizeof(*atype), GFP_KERNEL); > + if (!atype) > + continue; > + > + atype->index = i; > + list_add_tail_rcu(&atype->atypes, &unused_bpf_lsm_atypes); > + } > + > + return 0; > +} > +late_initcall(bpf_lsm_attach_type_init); > + > static enum cgroup_bpf_attach_type bpf_lsm_attach_type_get(u32 attach_btf_id) > { > - return CGROUP_LSM_START + bpf_lsm_hook_idx(attach_btf_id); > + struct bpf_lsm_attach_type *atype; > + > + lockdep_assert_held(&cgroup_mutex); > + > + list_for_each_entry_rcu(atype, &used_bpf_lsm_atypes, atypes) { > + if (atype->btf_id != attach_btf_id) > + continue; > + > + atype->usecnt++; > + return CGROUP_LSM_START + atype->index; > + } > + > + atype = list_first_or_null_rcu(&unused_bpf_lsm_atypes, struct bpf_lsm_attach_type, atypes); > + if (!atype) > + return -E2BIG; > + > + list_del_rcu(&atype->atypes); > + atype->btf_id = attach_btf_id; > + atype->usecnt = 1; > + list_add_tail_rcu(&atype->atypes, &used_bpf_lsm_atypes); > + > + return CGROUP_LSM_START + atype->index; > +} > + > +static void bpf_lsm_attach_type_reclaim(struct rcu_head *head) > +{ > + struct bpf_lsm_attach_type *atype = > + container_of(head, struct bpf_lsm_attach_type, rcu_head); > + > + atype->btf_id = 0; > + atype->usecnt = 0; > + list_add_tail_rcu(&atype->atypes, &unused_bpf_lsm_atypes); hmm...... no need to hold the cgroup_mutex when changing the unused_bpf_lsm_atypes list ? but it is a rcu callback, so spinlock is needed. > +} > + > +static void bpf_lsm_attach_type_put(u32 attach_btf_id) > +{ > + struct bpf_lsm_attach_type *atype; > + > + lockdep_assert_held(&cgroup_mutex); > + > + list_for_each_entry_rcu(atype, &used_bpf_lsm_atypes, atypes) { > + if (atype->btf_id != attach_btf_id) > + continue; > + > + if (--atype->usecnt <= 0) { > + list_del_rcu(&atype->atypes); > + WARN_ON_ONCE(atype->usecnt < 0); > + > + /* call_rcu here prevents atype reuse within > + * the same rcu grace period. > + * shim programs use __bpf_prog_enter_lsm_cgroup > + * which starts RCU read section. It is a bit unclear for me to think through why there is no need to assign 'shim_prog->aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID' here before reclaim and the shim_prog->bpf_func does not need to check shim_prog->aux->cgroup_atype before using it. It will be very useful to have a few word comments here to explain this. > + */ > + call_rcu(&atype->rcu_head, bpf_lsm_attach_type_reclaim); How about doing this bpf_lsm_attach_type_put() in bpf_prog_free_deferred(). And only do it for the shim_prog but not the cgroup-lsm prog. The shim_prog is the only one needing cgroup_atype. Then the cgroup_atype naturally can be reused when the shim_prog is being destroyed. bpf_prog_free_deferred has already gone through a rcu grace period (__bpf_prog_put_rcu) and it can block, so cgroup_mutex can be used. The need for the rcu_head here should go away also. The v6 array approach could be reconsidered. The cgroup-lsm prog does not necessarily need to hold a usecnt to the cgroup_atype. Their aux->cgroup_atype can be CGROUP_BPF_ATTACH_TYPE_INVALID. My understanding is the replace_prog->aux->cgroup_atype during attach is an optimization, it can always search again.