Re: [PATCH bpf-next v7 03/11] bpf: per-cgroup lsm flavor

Stanislav Fomichev <sdf@xxxxxxxxxx> · Fri, 20 May 2022 17:03:20 -0700

On Thu, May 19, 2022 at 6:01 PM Yonghong Song <yhs@xxxxxx> wrote:
>
>
>
> On 5/18/22 3:55 PM, Stanislav Fomichev wrote:
> > Allow attaching to lsm hooks in the cgroup context.
> >
> > Attaching to per-cgroup LSM works exactly like attaching
> > to other per-cgroup hooks. New BPF_LSM_CGROUP is added
> > to trigger new mode; the actual lsm hook we attach to is
> > signaled via existing attach_btf_id.
> >
> > For the hooks that have 'struct socket' or 'struct sock' as its first
> > argument, we use the cgroup associated with that socket. For the rest,
> > we use 'current' cgroup (this is all on default hierarchy == v2 only).
> > Note that for some hooks that work on 'struct sock' we still
> > take the cgroup from 'current' because some of them work on the socket
> > that hasn't been properly initialized yet.
> >
> > Behind the scenes, we allocate a shim program that is attached
> > to the trampoline and runs cgroup effective BPF programs array.
> > This shim has some rudimentary ref counting and can be shared
> > between several programs attaching to the same per-cgroup lsm hook.
> >
> > Note that this patch bloats cgroup size because we add 211
> > cgroup_bpf_attach_type(s) for simplicity sake. This will be
> > addressed in the subsequent patch.
> >
> > Also note that we only add non-sleepable flavor for now. To enable
> > sleepable use-cases, bpf_prog_run_array_cg has to grab trace rcu,
> > shim programs have to be freed via trace rcu, cgroup_bpf.effective
> > should be also trace-rcu-managed + maybe some other changes that
> > I'm not aware of.
> >
> > Signed-off-by: Stanislav Fomichev <sdf@xxxxxxxxxx>
> > ---
> >   arch/x86/net/bpf_jit_comp.c     |  24 +++--
> >   include/linux/bpf-cgroup-defs.h |   6 ++
> >   include/linux/bpf-cgroup.h      |   7 ++
> >   include/linux/bpf.h             |  25 +++++
> >   include/linux/bpf_lsm.h         |  14 +++
> >   include/linux/btf_ids.h         |   3 +-
> >   include/uapi/linux/bpf.h        |   1 +
> >   kernel/bpf/bpf_lsm.c            |  50 +++++++++
> >   kernel/bpf/btf.c                |  11 ++
> >   kernel/bpf/cgroup.c             | 181 ++++++++++++++++++++++++++++---
> >   kernel/bpf/core.c               |   2 +
> >   kernel/bpf/syscall.c            |  10 ++
> >   kernel/bpf/trampoline.c         | 184 ++++++++++++++++++++++++++++++++
> >   kernel/bpf/verifier.c           |  28 +++++
> >   tools/include/linux/btf_ids.h   |   4 +-
> >   tools/include/uapi/linux/bpf.h  |   1 +
> >   16 files changed, 527 insertions(+), 24 deletions(-)
>
> A few nits below.
>
> >
> > diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> > index a2b6d197c226..5cdebf4312da 100644
> > --- a/arch/x86/net/bpf_jit_comp.c
> > +++ b/arch/x86/net/bpf_jit_comp.c
> > @@ -1765,6 +1765,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> >                          struct bpf_tramp_link *l, int stack_size,
> >                          int run_ctx_off, bool save_ret)
> >   {
> > +     void (*exit)(struct bpf_prog *prog, u64 start,
> > +                  struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_exit;
> > +     u64 (*enter)(struct bpf_prog *prog,
> > +                  struct bpf_tramp_run_ctx *run_ctx) = __bpf_prog_enter;
> >       u8 *prog = *pprog;
> >       u8 *jmp_insn;
> >       int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
> [...]
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index ea3674a415f9..70cf1dad91df 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -768,6 +768,10 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_tramp_
> >   u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx);
> >   void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
> >                                      struct bpf_tramp_run_ctx *run_ctx);
> > +u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
> > +                                     struct bpf_tramp_run_ctx *run_ctx);
> > +void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
> > +                                     struct bpf_tramp_run_ctx *run_ctx);
> >   void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
> >   void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
> >
> > @@ -1035,6 +1039,7 @@ struct bpf_prog_aux {
> >       u64 load_time; /* ns since boottime */
> >       u32 verified_insns;
> >       struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
> > +     int cgroup_atype; /* enum cgroup_bpf_attach_type */
>
> Move cgroup_atype right after verified_insns to fill the existing gap?

Good idea!

> >       char name[BPF_OBJ_NAME_LEN];
> >   #ifdef CONFIG_SECURITY
> >       void *security;
> > @@ -1107,6 +1112,12 @@ struct bpf_tramp_link {
> >       u64 cookie;
> >   };
> >
> > +struct bpf_shim_tramp_link {
> > +     struct bpf_tramp_link tramp_link;
> > +     struct bpf_trampoline *tr;
> > +     atomic64_t refcnt;
> > +};
> > +
> >   struct bpf_tracing_link {
> >       struct bpf_tramp_link link;
> >       enum bpf_attach_type attach_type;
> > @@ -1185,6 +1196,9 @@ struct bpf_dummy_ops {
> >   int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
> >                           union bpf_attr __user *uattr);
> >   #endif
> > +int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
> > +                                 struct bpf_attach_target_info *tgt_info);
> > +void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog);
> >   #else
> >   static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
> >   {
> > @@ -1208,6 +1222,14 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
> >   {
> >       return -EINVAL;
> >   }
> > +static inline int bpf_trampoline_link_cgroup_shim(struct bpf_prog *prog,
> > +                                               struct bpf_attach_target_info *tgt_info)
> > +{
> > +     return -EOPNOTSUPP;
> > +}
> > +static inline void bpf_trampoline_unlink_cgroup_shim(struct bpf_prog *prog)
> > +{
> > +}
> >   #endif
> >
> >   struct bpf_array {
> > @@ -2250,6 +2272,8 @@ extern const struct bpf_func_proto bpf_loop_proto;
> >   extern const struct bpf_func_proto bpf_strncmp_proto;
> >   extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
> >   extern const struct bpf_func_proto bpf_kptr_xchg_proto;
> > +extern const struct bpf_func_proto bpf_set_retval_proto;
> > +extern const struct bpf_func_proto bpf_get_retval_proto;
> >
> >   const struct bpf_func_proto *tracing_prog_func_proto(
> >     enum bpf_func_id func_id, const struct bpf_prog *prog);
> > @@ -2366,6 +2390,7 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len);
> >
> >   struct btf_id_set;
> >   bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
> > +int btf_id_set_index(const struct btf_id_set *set, u32 id);
> >
> >   #define MAX_BPRINTF_VARARGS         12
> >
> > diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
> > index 479c101546ad..7f0e59f5f9be 100644
> > --- a/include/linux/bpf_lsm.h
> > +++ b/include/linux/bpf_lsm.h
> > @@ -42,6 +42,9 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto;
> >   extern const struct bpf_func_proto bpf_inode_storage_delete_proto;
> >   void bpf_inode_storage_free(struct inode *inode);
> >
> > +int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func);
> > +int bpf_lsm_hook_idx(u32 btf_id);
> > +
> >   #else /* !CONFIG_BPF_LSM */
> >
> >   static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
> > @@ -65,6 +68,17 @@ static inline void bpf_inode_storage_free(struct inode *inode)
> >   {
> >   }
> >
> > +static inline int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
> > +                                        bpf_func_t *bpf_func)
> > +{
> > +     return -ENOENT;
> > +}
> > +
> > +static inline int bpf_lsm_hook_idx(u32 btf_id)
> > +{
> > +     return -EINVAL;
> > +}
> > +
> >   #endif /* CONFIG_BPF_LSM */
> >
> >   #endif /* _LINUX_BPF_LSM_H */
> > diff --git a/include/linux/btf_ids.h b/include/linux/btf_ids.h
> > index bc5d9cc34e4c..857cc37094da 100644
> > --- a/include/linux/btf_ids.h
> > +++ b/include/linux/btf_ids.h
> > @@ -178,7 +178,8 @@ extern struct btf_id_set name;
> >       BTF_SOCK_TYPE(BTF_SOCK_TYPE_TCP6, tcp6_sock)                    \
> >       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP, udp_sock)                      \
> >       BTF_SOCK_TYPE(BTF_SOCK_TYPE_UDP6, udp6_sock)                    \
> > -     BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)
> > +     BTF_SOCK_TYPE(BTF_SOCK_TYPE_UNIX, unix_sock)                    \
> > +     BTF_SOCK_TYPE(BTF_SOCK_TYPE_SOCKET, socket)
> >
> >   enum {
> >   #define BTF_SOCK_TYPE(name, str) name,
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 0210f85131b3..b9d2d6de63a7 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -998,6 +998,7 @@ enum bpf_attach_type {
> >       BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
> >       BPF_PERF_EVENT,
> >       BPF_TRACE_KPROBE_MULTI,
> > +     BPF_LSM_CGROUP,
> >       __MAX_BPF_ATTACH_TYPE
> >   };
> >
> > diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
> > index c1351df9f7ee..654c23577ad3 100644
> > --- a/kernel/bpf/bpf_lsm.c
> > +++ b/kernel/bpf/bpf_lsm.c
> > @@ -16,6 +16,7 @@
> >   #include <linux/bpf_local_storage.h>
> >   #include <linux/btf_ids.h>
> >   #include <linux/ima.h>
> > +#include <linux/bpf-cgroup.h>
> >
> >   /* For every LSM hook that allows attachment of BPF programs, declare a nop
> >    * function where a BPF program can be attached.
> > @@ -35,6 +36,46 @@ BTF_SET_START(bpf_lsm_hooks)
> >   #undef LSM_HOOK
> >   BTF_SET_END(bpf_lsm_hooks)
> >
> > +/* List of LSM hooks that should operate on 'current' cgroup regardless
> > + * of function signature.
> > + */
> > +BTF_SET_START(bpf_lsm_current_hooks)
> > +/* operate on freshly allocated sk without any cgroup association */
> > +BTF_ID(func, bpf_lsm_sk_alloc_security)
> > +BTF_ID(func, bpf_lsm_sk_free_security)
> > +BTF_SET_END(bpf_lsm_current_hooks)
> > +
> > +int bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog,
> > +                          bpf_func_t *bpf_func)
> > +{
> > +     const struct btf_param *args;
> > +
> > +     if (btf_type_vlen(prog->aux->attach_func_proto) < 1 ||
> > +         btf_id_set_contains(&bpf_lsm_current_hooks,
> > +                             prog->aux->attach_btf_id)) {
> > +             *bpf_func = __cgroup_bpf_run_lsm_current;
> > +             return 0;
> > +     }
> > +
> > +     args = btf_params(prog->aux->attach_func_proto);
> > +
> > +#ifdef CONFIG_NET
> > +     if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCKET])
> > +             *bpf_func = __cgroup_bpf_run_lsm_socket;
> > +     else if (args[0].type == btf_sock_ids[BTF_SOCK_TYPE_SOCK])
> > +             *bpf_func = __cgroup_bpf_run_lsm_sock;
> > +     else
> > +#endif
> > +             *bpf_func = __cgroup_bpf_run_lsm_current;
> > +
> > +     return 0;
>
> This function always return 0, change the return type to void?

Oh, good catch, over time we've removed all error cases from it, will
convert to void.

> > +}
> > +
> > +int bpf_lsm_hook_idx(u32 btf_id)
> > +{
> > +     return btf_id_set_index(&bpf_lsm_hooks, btf_id);
> > +}
> > +
> >   int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
> >                       const struct bpf_prog *prog)
> >   {
> > @@ -158,6 +199,15 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
> >               return prog->aux->sleepable ? &bpf_ima_file_hash_proto : NULL;
> >       case BPF_FUNC_get_attach_cookie:
> >               return bpf_prog_has_trampoline(prog) ? &bpf_get_attach_cookie_proto : NULL;
> > +     case BPF_FUNC_get_local_storage:
> > +             return prog->expected_attach_type == BPF_LSM_CGROUP ?
> > +                     &bpf_get_local_storage_proto : NULL;
> > +     case BPF_FUNC_set_retval:
> > +             return prog->expected_attach_type == BPF_LSM_CGROUP ?
> > +                     &bpf_set_retval_proto : NULL;
> > +     case BPF_FUNC_get_retval:
> > +             return prog->expected_attach_type == BPF_LSM_CGROUP ?
> > +                     &bpf_get_retval_proto : NULL;
> >       default:
> >               return tracing_prog_func_proto(func_id, prog);
> >       }
> > diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> > index 2f0b0440131c..a90f04a8a8ee 100644
> > --- a/kernel/bpf/btf.c
> > +++ b/kernel/bpf/btf.c
> > @@ -5248,6 +5248,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
> >
> >       if (arg == nr_args) {
> >               switch (prog->expected_attach_type) {
> > +             case BPF_LSM_CGROUP:
> >               case BPF_LSM_MAC:
> >               case BPF_TRACE_FEXIT:
> >                       /* When LSM programs are attached to void LSM hooks
> > @@ -6726,6 +6727,16 @@ static int btf_id_cmp_func(const void *a, const void *b)
> >       return *pa - *pb;
> >   }
> >
> > +int btf_id_set_index(const struct btf_id_set *set, u32 id)
> > +{
> > +     const u32 *p;
> > +
> > +     p = bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func);
> > +     if (!p)
> > +             return -1;
> > +     return p - set->ids;
> > +}
> > +
> >   bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
> >   {
> >       return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
> > diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
> > index 134785ab487c..2c356a38f4cf 100644
> > --- a/kernel/bpf/cgroup.c
> > +++ b/kernel/bpf/cgroup.c
> > @@ -14,6 +14,9 @@
> >   #include <linux/string.h>
> >   #include <linux/bpf.h>
> >   #include <linux/bpf-cgroup.h>
> > +#include <linux/btf_ids.h>
> > +#include <linux/bpf_lsm.h>
> > +#include <linux/bpf_verifier.h>
> >   #include <net/sock.h>
> >   #include <net/bpf_sk_storage.h>
> >
> > @@ -61,6 +64,85 @@ bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
> >       return run_ctx.retval;
> >   }
> >
> > +unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
> > +                                    const struct bpf_insn *insn)
> > +{
> > +     const struct bpf_prog *shim_prog;
> > +     struct sock *sk;
> > +     struct cgroup *cgrp;
> > +     int ret = 0;
> > +     u64 *regs;
> > +
> > +     regs = (u64 *)ctx;
> > +     sk = (void *)(unsigned long)regs[BPF_REG_0];
>
> Maybe just my own opinion. Using BPF_REG_0 as index is a little bit
> confusing. Maybe just use '0' to indicate the first parameters.
> Maybe change 'regs' to 'params' is also a better choice?
> In reality, trampline just passed an array of parameters to
> the program. The same for a few places below.

Sure, let's rename it and use 0. I'll do args instead of params maybe?

> > +     /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> > +     shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
>
> I didn't experiment, but why container_of won't work?

There is a type check in container_of that doesn't seem to work for flex arrays:

kernel/bpf/cgroup.c:78:14: error: static_assert failed due to
requirement '__builtin_types_compatible_p(const struct bpf_insn,
struct bpf_insn []"
        shim_prog = container_of(insn, struct bpf_prog, insnsi);
                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
./include/linux/container_of.h:19:2: note: expanded from macro 'container_of'
        static_assert(__same_type(*(ptr), ((type *)0)->member) ||       \
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:77:34: note: expanded from macro 'static_assert'
#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
                                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:78:41: note: expanded from macro '__static_assert'
#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
                                        ^              ~~~~
1 error generated.

Am I doing it wrong?

> > +
> > +     cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
> > +     if (likely(cgrp))
> > +             ret = bpf_prog_run_array_cg(&cgrp->bpf,
> > +                                         shim_prog->aux->cgroup_atype,
> > +                                         ctx, bpf_prog_run, 0, NULL);
> > +     return ret;
> > +}
> > +
> > +unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
> > +                                      const struct bpf_insn *insn)
> > +{
> > +     const struct bpf_prog *shim_prog;
> > +     struct socket *sock;
> > +     struct cgroup *cgrp;
> > +     int ret = 0;
> > +     u64 *regs;
> > +
> > +     regs = (u64 *)ctx;
> > +     sock = (void *)(unsigned long)regs[BPF_REG_0];
> > +     /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
> > +     shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
> > +
> > +     cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
> > +     if (likely(cgrp))
> > +             ret = bpf_prog_run_array_cg(&cgrp->bpf,
> > +                                         shim_prog->aux->cgroup_atype,
> > +                                         ctx, bpf_prog_run, 0, NULL);
> > +     return ret;
> > +}
> > +
> > +unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
> > +                                       const struct bpf_insn *insn)
> > +{
> > +     const struct bpf_prog *shim_prog;
> > +     struct cgroup *cgrp;
> > +     int ret = 0;
> > +
> > +     if (unlikely(!current))
> > +             return 0;
>
> I think we don't need this check.

SG, will remove it. Indeed, there doesn't seem to be a lot of "if
(current)" checks elsewhere.

Thank you for the review! Will try to address everything and respin
sometime next week (in case others want to have a quick look).