From: Alexei Starovoitov <ast@xxxxxxxxxx> Add kernel module with user mode driver that populates bpffs with BPF iterators. $ mount bpffs /sys/fs/bpf/ -t bpf $ ls -la /sys/fs/bpf/ total 4 drwxrwxrwt 2 root root 0 Jul 2 00:27 . drwxr-xr-x 19 root root 4096 Jul 2 00:09 .. -rw------- 1 root root 0 Jul 2 00:27 maps -rw------- 1 root root 0 Jul 2 00:27 progs The user mode driver will load BPF Type Formats, create BPF maps, populate BPF maps, load two BPF programs, attach them to BPF iterators, and finally send two bpf_link IDs back to the kernel. The kernel will pin two bpf_links into newly mounted bpffs instance under names "progs" and "maps". These two files become human readable. $ cat /sys/fs/bpf/progs id name pages attached 11 dump_bpf_map 1 bpf_iter_bpf_map 12 dump_bpf_prog 1 bpf_iter_bpf_prog 27 test_pkt_access 1 32 test_main 1 test_pkt_access test_pkt_access 33 test_subprog1 1 test_pkt_access_subprog1 test_pkt_access 34 test_subprog2 1 test_pkt_access_subprog2 test_pkt_access 35 test_subprog3 1 test_pkt_access_subprog3 test_pkt_access 36 new_get_skb_len 1 get_skb_len test_pkt_access 37 new_get_skb_ifi 1 get_skb_ifindex test_pkt_access 38 new_get_constan 1 get_constant test_pkt_access The BPF program dump_bpf_prog() in iterators.bpf.c is printing this data about all BPF programs currently loaded in the system. This information is unstable and will change from kernel to kernel. Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx> --- init/Kconfig | 2 + kernel/bpf/Makefile | 1 + kernel/bpf/inode.c | 75 ++++++++++++++++ kernel/bpf/preload/Kconfig | 15 ++++ kernel/bpf/preload/Makefile | 21 +++++ kernel/bpf/preload/bpf_preload.h | 15 ++++ kernel/bpf/preload/bpf_preload_kern.c | 87 +++++++++++++++++++ kernel/bpf/preload/bpf_preload_umd_blob.S | 7 ++ .../preload/iterators/bpf_preload_common.h | 8 ++ kernel/bpf/preload/iterators/iterators.c | 81 +++++++++++++++++ 10 files changed, 312 insertions(+) create mode 100644 kernel/bpf/preload/Kconfig create mode 100644 kernel/bpf/preload/Makefile create mode 100644 kernel/bpf/preload/bpf_preload.h create mode 100644 kernel/bpf/preload/bpf_preload_kern.c create mode 100644 kernel/bpf/preload/bpf_preload_umd_blob.S create mode 100644 kernel/bpf/preload/iterators/bpf_preload_common.h create mode 100644 kernel/bpf/preload/iterators/iterators.c diff --git a/init/Kconfig b/init/Kconfig index a46aa8f3174d..278975a5daf2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2313,3 +2313,5 @@ config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE # <asm/syscall_wrapper.h>. config ARCH_HAS_SYSCALL_WRAPPER def_bool n + +source "kernel/bpf/preload/Kconfig" diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index e6eb9c0402da..19e137aae40e 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -29,3 +29,4 @@ ifeq ($(CONFIG_BPF_JIT),y) obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o obj-${CONFIG_BPF_LSM} += bpf_lsm.o endif +obj-$(CONFIG_BPF_PRELOAD) += preload/ diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index fb878ba3f22f..8d33edd5c69c 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -20,6 +20,7 @@ #include <linux/filter.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> +#include "preload/bpf_preload.h" enum bpf_type { BPF_TYPE_UNSPEC = 0, @@ -409,6 +410,26 @@ static const struct inode_operations bpf_dir_iops = { .unlink = simple_unlink, }; +static int bpf_link_pin_kernel(struct dentry *parent, + const char *name, struct bpf_link *link) +{ + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + struct dentry *dentry; + int ret; + + inode_lock(parent->d_inode); + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) { + inode_unlock(parent->d_inode); + return PTR_ERR(dentry); + } + ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, + &bpf_iter_fops); + dput(dentry); + inode_unlock(parent->d_inode); + return ret; +} + static int bpf_obj_do_pin(const char __user *pathname, void *raw, enum bpf_type type) { @@ -638,6 +659,57 @@ static int bpf_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } +struct bpf_preload_ops bpf_preload_ops = { .info.driver_name = "bpf_preload" }; +EXPORT_SYMBOL_GPL(bpf_preload_ops); + +static int populate_bpffs(struct dentry *parent) +{ + struct bpf_link *links[BPF_PRELOAD_LINKS] = {}; + u32 link_id[BPF_PRELOAD_LINKS] = {}; + int err = 0, i; + + mutex_lock(&bpf_preload_ops.lock); + if (!bpf_preload_ops.do_preload) { + mutex_unlock(&bpf_preload_ops.lock); + request_module("bpf_preload"); + mutex_lock(&bpf_preload_ops.lock); + + if (!bpf_preload_ops.do_preload) { + pr_err("bpf_preload module is missing.\n" + "bpffs will not have iterators.\n"); + goto out; + } + } + + if (!bpf_preload_ops.info.tgid) { + err = bpf_preload_ops.do_preload(link_id); + if (err) + goto out; + for (i = 0; i < BPF_PRELOAD_LINKS; i++) { + links[i] = bpf_link_by_id(link_id[i]); + if (IS_ERR(links[i])) { + err = PTR_ERR(links[i]); + goto out; + } + } + err = bpf_link_pin_kernel(parent, "maps", links[0]); + if (err) + goto out; + err = bpf_link_pin_kernel(parent, "progs", links[1]); + if (err) + goto out; + err = bpf_preload_ops.do_finish(); + if (err) + goto out; + } +out: + mutex_unlock(&bpf_preload_ops.lock); + for (i = 0; i < BPF_PRELOAD_LINKS && err; i++) + if (!IS_ERR_OR_NULL(links[i])) + bpf_link_put(links[i]); + return err; +} + static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) { static const struct tree_descr bpf_rfiles[] = { { "" } }; @@ -656,6 +728,7 @@ static int bpf_fill_super(struct super_block *sb, struct fs_context *fc) inode->i_mode &= ~S_IALLUGO; inode->i_mode |= S_ISVTX | opts->mode; + populate_bpffs(sb->s_root); return 0; } @@ -705,6 +778,8 @@ static int __init bpf_init(void) { int ret; + mutex_init(&bpf_preload_ops.lock); + ret = sysfs_create_mount_point(fs_kobj, "bpf"); if (ret) return ret; diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig new file mode 100644 index 000000000000..b737ce4c2bab --- /dev/null +++ b/kernel/bpf/preload/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0-only +menuconfig BPF_PRELOAD + bool "Load kernel specific BPF programs at kernel boot time (BPF_PRELOAD)" + depends on BPF + help + tbd + +if BPF_PRELOAD +config BPF_PRELOAD_UMD + tristate "bpf_preload kernel module with user mode driver" + depends on CC_CAN_LINK_STATIC + default m + help + This builds bpf_preload kernel module with embedded user mode driver +endif diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile new file mode 100644 index 000000000000..191d82209842 --- /dev/null +++ b/kernel/bpf/preload/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +LIBBPF := $(srctree)/../../tools/lib/bpf +userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi -I $(LIBBPF) \ + -I $(srctree)/tools/lib/ \ + -I $(srctree)/kernel/bpf/preload/iterators/ -Wno-int-conversion \ + -DCOMPAT_NEED_REALLOCARRAY + +userprogs := bpf_preload_umd + +LIBBPF_O := $(LIBBPF)/bpf.o $(LIBBPF)/libbpf.o $(LIBBPF)/btf.o $(LIBBPF)/libbpf_errno.o \ + $(LIBBPF)/str_error.o $(LIBBPF)/hashmap.o $(LIBBPF)/libbpf_probes.o + +bpf_preload_umd-objs := iterators/iterators.o $(LIBBPF_O) + +userldflags += -lelf -lz + +$(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd + +obj-$(CONFIG_BPF_PRELOAD_UMD) += bpf_preload.o +bpf_preload-objs += bpf_preload_kern.o bpf_preload_umd_blob.o diff --git a/kernel/bpf/preload/bpf_preload.h b/kernel/bpf/preload/bpf_preload.h new file mode 100644 index 000000000000..0d852574c02a --- /dev/null +++ b/kernel/bpf/preload/bpf_preload.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_PRELOAD_H +#define _BPF_PRELOAD_H + +#include <linux/usermode_driver.h> + +struct bpf_preload_ops { + struct umd_info info; + struct mutex lock; + int (*do_preload)(u32 *); + int (*do_finish)(void); +}; +extern struct bpf_preload_ops bpf_preload_ops; +#define BPF_PRELOAD_LINKS 2 +#endif diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c new file mode 100644 index 000000000000..bfcd1fb3891c --- /dev/null +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pid.h> +#include <linux/fs.h> +#include <linux/sched/signal.h> +#include "bpf_preload.h" +#include "iterators/bpf_preload_common.h" + +extern char bpf_preload_umd_start; +extern char bpf_preload_umd_end; + +static int do_preload(u32 *link_id) +{ + int magic = BPF_PRELOAD_START; + struct pid *tgid; + int id, i, err; + loff_t pos; + ssize_t n; + + err = fork_usermode_driver(&bpf_preload_ops.info); + if (err) + return err; + tgid = bpf_preload_ops.info.tgid; + + /* send the start magic to let UMD proceed with loading BPF progs */ + n = __kernel_write(bpf_preload_ops.info.pipe_to_umh, + &magic, sizeof(magic), &pos); + if (n != sizeof(magic)) + return -EPIPE; + + /* receive bpf_link IDs from UMD */ + pos = 0; + for (i = 0; i < BPF_PRELOAD_LINKS; i++) { + n = kernel_read(bpf_preload_ops.info.pipe_from_umh, + &id, sizeof(id), &pos); + if (n != sizeof(id)) + return -EPIPE; + link_id[i] = id; + } + return 0; +} + +static int do_finish(void) +{ + int magic = BPF_PRELOAD_END; + struct pid *tgid; + loff_t pos; + ssize_t n; + + /* send the last magic to UMD. It will do a normal exit. */ + n = __kernel_write(bpf_preload_ops.info.pipe_to_umh, + &magic, sizeof(magic), &pos); + if (n != sizeof(magic)) + return -EPIPE; + tgid = bpf_preload_ops.info.tgid; + wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); + bpf_preload_ops.info.tgid = NULL; + return 0; +} + +static int __init load_umd(void) +{ + int err; + + err = umd_load_blob(&bpf_preload_ops.info, &bpf_preload_umd_start, + &bpf_preload_umd_end - &bpf_preload_umd_start); + if (err) + return err; + bpf_preload_ops.do_preload = do_preload; + bpf_preload_ops.do_finish = do_finish; + return err; +} + +static void __exit fini_umd(void) +{ + bpf_preload_ops.do_preload = NULL; + bpf_preload_ops.do_finish = NULL; + /* kill UMD in case it's still there due to earlier error */ + kill_pid(bpf_preload_ops.info.tgid, SIGKILL, 1); + bpf_preload_ops.info.tgid = NULL; + umd_unload_blob(&bpf_preload_ops.info); +} +late_initcall(load_umd); +module_exit(fini_umd); +MODULE_LICENSE("GPL"); diff --git a/kernel/bpf/preload/bpf_preload_umd_blob.S b/kernel/bpf/preload/bpf_preload_umd_blob.S new file mode 100644 index 000000000000..d0fe58c0734a --- /dev/null +++ b/kernel/bpf/preload/bpf_preload_umd_blob.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .section .init.rodata, "a" + .global bpf_preload_umd_start +bpf_preload_umd_start: + .incbin "bpf_preload_umd" + .global bpf_preload_umd_end +bpf_preload_umd_end: diff --git a/kernel/bpf/preload/iterators/bpf_preload_common.h b/kernel/bpf/preload/iterators/bpf_preload_common.h new file mode 100644 index 000000000000..f2e77711cd95 --- /dev/null +++ b/kernel/bpf/preload/iterators/bpf_preload_common.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BPF_PRELOAD_COMMON_H +#define _BPF_PRELOAD_COMMON_H + +#define BPF_PRELOAD_START 0x5555 +#define BPF_PRELOAD_END 0xAAAA + +#endif diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c new file mode 100644 index 000000000000..74f23580b25f --- /dev/null +++ b/kernel/bpf/preload/iterators/iterators.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <argp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include <sys/mount.h> +#include "iterators.skel.h" +#include "bpf_preload_common.h" + +int to_kernel = -1; +int from_kernel = 0; + +static int send_id_to_kernel(struct bpf_link *link) +{ + struct bpf_link_info info = {}; + __u32 info_len = sizeof(info); + int err; + + err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len); + if (err) + return err; + if (write(to_kernel, &info.id, sizeof(info.id)) != sizeof(info.id)) + return -EPIPE; + return 0; +} + +int main(int argc, char **argv) +{ + struct iterators_bpf *skel; + int err, magic; + int debug_fd; + + debug_fd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC); + if (debug_fd < 0) + return -1; + to_kernel = dup(1); + close(1); + dup(debug_fd); + /* now stdin and stderr point to /dev/console */ + + read(from_kernel, &magic, sizeof(magic)); + if (magic != BPF_PRELOAD_START) { + printf("bad start magic %d\n", magic); + return -1; + } + + /* libbpf opens BPF object and loads it into the kernel */ + skel = iterators_bpf__open_and_load(); + if (!skel) + return -1; + + err = iterators_bpf__attach(skel); + if (err) + goto cleanup; + + /* send two bpf_link IDs to the kernel */ + err = send_id_to_kernel(skel->links.dump_bpf_map); + if (err) + goto cleanup; + err = send_id_to_kernel(skel->links.dump_bpf_prog); + if (err) + goto cleanup; + + /* The kernel will proceed with pinnging the links in bpffs. + * UMD will wait on read from pipe. + */ + read(from_kernel, &magic, sizeof(magic)); + if (magic != BPF_PRELOAD_END) { + printf("bad final magic %d\n", magic); + err = -EINVAL; + } +cleanup: + iterators_bpf__destroy(skel); + + return err != 0; +} -- 2.23.0