On Mon, Oct 16, 2023 at 8:44 PM Daniel Borkmann <daniel@xxxxxxxxxxxxx> wrote: > > On 10/16/23 1:29 AM, Hengqi Chen wrote: > > This patch adds a new operation named SECCOMP_LOAD_FILTER. > > It accepts a sock_fprog the same as SECCOMP_SET_MODE_FILTER > > but only performs the loading process. If succeed, return a > > new fd associated with the JITed BPF program (the filter). > > The filter can then be pinned to bpffs using the returned > > fd and reused for different processes. To distinguish the > > filter from other BPF progs, BPF_PROG_TYPE_SECCOMP is added. > > > > Signed-off-by: Hengqi Chen <hengqi.chen@xxxxxxxxx> > > --- > > include/uapi/linux/bpf.h | 1 + > > include/uapi/linux/seccomp.h | 1 + > > kernel/seccomp.c | 43 ++++++++++++++++++++++++++++++++++ > > tools/include/uapi/linux/bpf.h | 1 + > > 4 files changed, 46 insertions(+) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index 7ba61b75bc0e..61c80ffb1724 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -995,6 +995,7 @@ enum bpf_prog_type { > > BPF_PROG_TYPE_SK_LOOKUP, > > BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ > > BPF_PROG_TYPE_NETFILTER, > > + BPF_PROG_TYPE_SECCOMP, > > Please don't extend UAPI surface if this is not reachable/usable from user > space anyway. > > > enum bpf_attach_type { > > diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h > > index dbfc9b37fcae..ee2c83697810 100644 > > --- a/include/uapi/linux/seccomp.h > > +++ b/include/uapi/linux/seccomp.h > > @@ -16,6 +16,7 @@ > > #define SECCOMP_SET_MODE_FILTER 1 > > #define SECCOMP_GET_ACTION_AVAIL 2 > > #define SECCOMP_GET_NOTIF_SIZES 3 > > +#define SECCOMP_LOAD_FILTER 4 > > > > /* Valid flags for SECCOMP_SET_MODE_FILTER */ > > #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) > > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > > index faf84fc892eb..c9f6a19f7a4e 100644 > > --- a/kernel/seccomp.c > > +++ b/kernel/seccomp.c > > @@ -17,6 +17,7 @@ > > > > #include <linux/refcount.h> > > #include <linux/audit.h> > > +#include <linux/bpf.h> > > #include <linux/compat.h> > > #include <linux/coredump.h> > > #include <linux/kmemleak.h> > > @@ -25,6 +26,7 @@ > > #include <linux/sched.h> > > #include <linux/sched/task_stack.h> > > #include <linux/seccomp.h> > > +#include <linux/security.h> > > #include <linux/slab.h> > > #include <linux/syscalls.h> > > #include <linux/sysctl.h> > > @@ -2032,12 +2034,48 @@ static long seccomp_set_mode_filter(unsigned int flags, > > seccomp_filter_free(prepared); > > return ret; > > } > > + > > +static long seccomp_load_filter(const char __user *filter) > > +{ > > + struct sock_fprog fprog; > > + struct bpf_prog *prog; > > + int ret; > > + > > + ret = seccomp_copy_user_filter(filter, &fprog); > > + if (ret) > > + return ret; > > + > > + ret = seccomp_prepare_prog(&prog, &fprog); > > + if (ret) > > + return ret; > > + > > + ret = security_bpf_prog_alloc(prog->aux); > > + if (ret) { > > + bpf_prog_free(prog); > > + return ret; > > + } > > + > > + prog->aux->user = get_current_user(); > > + atomic64_set(&prog->aux->refcnt, 1); > > + prog->type = BPF_PROG_TYPE_SECCOMP; > > + > > + ret = bpf_prog_new_fd(prog); > > + if (ret < 0) > > + bpf_prog_put(prog); > > My bigger concern here is that bpf_prog_new_fd() is only used by eBPF (not cBPF). > > Then you get an 'eBPF'-like fd back to user space which you can pass to various > other bpf(2) commands like BPF_OBJ_GET_INFO_BY_FD etc which all have the assumption > that this is a proper looking eBPF prog fd. > > There may be breakage/undefined behavior in subtle ways. > > I would suggest two potential alternatives : > > 1) Build a seccomp-specific fd via anon_inode_getfd() so that BPF side does not > confuse it with bpf_prog_fops and therefore does not recognize it in bpf(2) > as a prog fd. > > 2) Extend seccomp where proper eBPF could be supported. > > If option 2) is not realistic (where you would get this out of the box), then I > think 1) could be however. > The intention is to use bpffs, so we need a bpf prog fd. I prefer option 2, though it requires a bit of work. That way, we could also write seccomp filter in eBPF language. Kees, could you share your opinions ? If you have no objection, I will continue this work. > > + return ret; > > +} > > #else > > static inline long seccomp_set_mode_filter(unsigned int flags, > > const char __user *filter) > > { > > return -EINVAL; > > } > > + > > +static inline long seccomp_load_filter(const char __user *filter) > > +{ > > + return -EINVAL; > > +} > > #endif > > > > static long seccomp_get_action_avail(const char __user *uaction) > > @@ -2099,6 +2137,11 @@ static long do_seccomp(unsigned int op, unsigned int flags, > > return -EINVAL; > > > > return seccomp_get_notif_sizes(uargs); > > + case SECCOMP_LOAD_FILTER: > > + if (flags != 0) > > + return -EINVAL; > > + > > + return seccomp_load_filter(uargs); > > default: > > return -EINVAL; > > } > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > > index 7ba61b75bc0e..61c80ffb1724 100644 > > --- a/tools/include/uapi/linux/bpf.h > > +++ b/tools/include/uapi/linux/bpf.h > > @@ -995,6 +995,7 @@ enum bpf_prog_type { > > BPF_PROG_TYPE_SK_LOOKUP, > > BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ > > BPF_PROG_TYPE_NETFILTER, > > + BPF_PROG_TYPE_SECCOMP, > > }; > > > > enum bpf_attach_type { > > >