Re: [PATCH v2 2/5] seccomp, bpf: Introduce SECCOMP_LOAD_FILTER operation

Hengqi Chen <hengqi.chen@xxxxxxxxx> · Mon, 23 Oct 2023 12:17:35 +0800

On Mon, Oct 16, 2023 at 8:44 PM Daniel Borkmann <daniel@xxxxxxxxxxxxx> wrote:
>
> On 10/16/23 1:29 AM, Hengqi Chen wrote:
> > This patch adds a new operation named SECCOMP_LOAD_FILTER.
> > It accepts a sock_fprog the same as SECCOMP_SET_MODE_FILTER
> > but only performs the loading process. If succeed, return a
> > new fd associated with the JITed BPF program (the filter).
> > The filter can then be pinned to bpffs using the returned
> > fd and reused for different processes. To distinguish the
> > filter from other BPF progs, BPF_PROG_TYPE_SECCOMP is added.
> >
> > Signed-off-by: Hengqi Chen <hengqi.chen@xxxxxxxxx>
> > ---
> >   include/uapi/linux/bpf.h       |  1 +
> >   include/uapi/linux/seccomp.h   |  1 +
> >   kernel/seccomp.c               | 43 ++++++++++++++++++++++++++++++++++
> >   tools/include/uapi/linux/bpf.h |  1 +
> >   4 files changed, 46 insertions(+)
> >
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index 7ba61b75bc0e..61c80ffb1724 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -995,6 +995,7 @@ enum bpf_prog_type {
> >       BPF_PROG_TYPE_SK_LOOKUP,
> >       BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
> >       BPF_PROG_TYPE_NETFILTER,
> > +     BPF_PROG_TYPE_SECCOMP,
>
> Please don't extend UAPI surface if this is not reachable/usable from user
> space anyway.
>
> >   enum bpf_attach_type {
> > diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> > index dbfc9b37fcae..ee2c83697810 100644
> > --- a/include/uapi/linux/seccomp.h
> > +++ b/include/uapi/linux/seccomp.h
> > @@ -16,6 +16,7 @@
> >   #define SECCOMP_SET_MODE_FILTER             1
> >   #define SECCOMP_GET_ACTION_AVAIL    2
> >   #define SECCOMP_GET_NOTIF_SIZES             3
> > +#define SECCOMP_LOAD_FILTER          4
> >
> >   /* Valid flags for SECCOMP_SET_MODE_FILTER */
> >   #define SECCOMP_FILTER_FLAG_TSYNC           (1UL << 0)
> > diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> > index faf84fc892eb..c9f6a19f7a4e 100644
> > --- a/kernel/seccomp.c
> > +++ b/kernel/seccomp.c
> > @@ -17,6 +17,7 @@
> >
> >   #include <linux/refcount.h>
> >   #include <linux/audit.h>
> > +#include <linux/bpf.h>
> >   #include <linux/compat.h>
> >   #include <linux/coredump.h>
> >   #include <linux/kmemleak.h>
> > @@ -25,6 +26,7 @@
> >   #include <linux/sched.h>
> >   #include <linux/sched/task_stack.h>
> >   #include <linux/seccomp.h>
> > +#include <linux/security.h>
> >   #include <linux/slab.h>
> >   #include <linux/syscalls.h>
> >   #include <linux/sysctl.h>
> > @@ -2032,12 +2034,48 @@ static long seccomp_set_mode_filter(unsigned int flags,
> >       seccomp_filter_free(prepared);
> >       return ret;
> >   }
> > +
> > +static long seccomp_load_filter(const char __user *filter)
> > +{
> > +     struct sock_fprog fprog;
> > +     struct bpf_prog *prog;
> > +     int ret;
> > +
> > +     ret = seccomp_copy_user_filter(filter, &fprog);
> > +     if (ret)
> > +             return ret;
> > +
> > +     ret = seccomp_prepare_prog(&prog, &fprog);
> > +     if (ret)
> > +             return ret;
> > +
> > +     ret = security_bpf_prog_alloc(prog->aux);
> > +     if (ret) {
> > +             bpf_prog_free(prog);
> > +             return ret;
> > +     }
> > +
> > +     prog->aux->user = get_current_user();
> > +     atomic64_set(&prog->aux->refcnt, 1);
> > +     prog->type = BPF_PROG_TYPE_SECCOMP;
> > +
> > +     ret = bpf_prog_new_fd(prog);
> > +     if (ret < 0)
> > +             bpf_prog_put(prog);
>
> My bigger concern here is that bpf_prog_new_fd() is only used by eBPF (not cBPF).
>
> Then you get an 'eBPF'-like fd back to user space which you can pass to various
> other bpf(2) commands like BPF_OBJ_GET_INFO_BY_FD etc which all have the assumption
> that this is a proper looking eBPF prog fd.
>
> There may be breakage/undefined behavior in subtle ways.
>
> I would suggest two potential alternatives :
>
> 1) Build a seccomp-specific fd via anon_inode_getfd() so that BPF side does not
>     confuse it with bpf_prog_fops and therefore does not recognize it in bpf(2)
>     as a prog fd.
>
> 2) Extend seccomp where proper eBPF could be supported.
>
> If option 2) is not realistic (where you would get this out of the box), then I
> think 1) could be however.
>

The intention is to use bpffs, so we need a bpf prog fd.
I prefer option 2, though it requires a bit of work.
That way, we could also write seccomp filter in eBPF language.

Kees, could you share your opinions ? If you have no objection,
I will continue this work.

> > +     return ret;
> > +}
> >   #else
> >   static inline long seccomp_set_mode_filter(unsigned int flags,
> >                                          const char __user *filter)
> >   {
> >       return -EINVAL;
> >   }
> > +
> > +static inline long seccomp_load_filter(const char __user *filter)
> > +{
> > +     return -EINVAL;
> > +}
> >   #endif
> >
> >   static long seccomp_get_action_avail(const char __user *uaction)
> > @@ -2099,6 +2137,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
> >                       return -EINVAL;
> >
> >               return seccomp_get_notif_sizes(uargs);
> > +     case SECCOMP_LOAD_FILTER:
> > +             if (flags != 0)
> > +                     return -EINVAL;
> > +
> > +             return seccomp_load_filter(uargs);
> >       default:
> >               return -EINVAL;
> >       }
> > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > index 7ba61b75bc0e..61c80ffb1724 100644
> > --- a/tools/include/uapi/linux/bpf.h
> > +++ b/tools/include/uapi/linux/bpf.h
> > @@ -995,6 +995,7 @@ enum bpf_prog_type {
> >       BPF_PROG_TYPE_SK_LOOKUP,
> >       BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
> >       BPF_PROG_TYPE_NETFILTER,
> > +     BPF_PROG_TYPE_SECCOMP,
> >   };
> >
> >   enum bpf_attach_type {
> >
>