The bpf_map iterator is implemented. The bpf program is called at seq_ops show() and stop() functions. bpf_iter_get_prog() will retrieve bpf program and other parameters during seq_file object traversal. In show() function, bpf program will traverse every valid object, and in stop() function, bpf program will be called one more time after all objects are traversed. The first member of the bpf context contains the meta data, namely, the seq_file, session_id and seq_num. Here, the session_id is a unique id for one specific seq_file session. The seq_num is the number of bpf prog invocations in the current session. The bpf_iter_get_prog(), which will be implemented in subsequent patches, will have more information on how meta data are computed. The second member of the bpf context is a struct bpf_map pointer, which bpf program can examine. The target implementation also provided the structure definition for bpf program and the function definition for verifier to verify the bpf program. Specifically for bpf_map iterator, the structure is "bpf_iter__bpf_map" andd the function is "__bpf_iter__bpf_map". More targets will be implemented later, all of which will include the following, similar to bpf_map iterator: - seq_ops() implementation - function definition for verifier to verify the bpf program - seq_file private data size - additional target feature Signed-off-by: Yonghong Song <yhs@xxxxxx> --- include/linux/bpf.h | 10 ++++ kernel/bpf/Makefile | 2 +- kernel/bpf/bpf_iter.c | 19 ++++++++ kernel/bpf/map_iter.c | 107 ++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 13 +++++ 5 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/map_iter.c diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e56abc1e2f1..4ac8d61f7c3e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1078,6 +1078,7 @@ int generic_map_update_batch(struct bpf_map *map, int generic_map_delete_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr); +struct bpf_map *bpf_map_get_curr_or_next(u32 *id); extern int sysctl_unprivileged_bpf_disabled; @@ -1118,7 +1119,16 @@ struct bpf_iter_reg { u32 target_feature; }; +struct bpf_iter_meta { + __bpf_md_ptr(struct seq_file *, seq); + u64 session_id; + u64 seq_num; +}; + int bpf_iter_reg_target(struct bpf_iter_reg *reg_info); +struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, + u64 *session_id, u64 *seq_num, bool is_last); +int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 6a8b0febd3f6..b2b5eefc5254 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -2,7 +2,7 @@ obj-y := core.o CFLAGS_core.o += $(call cc-disable-warning, override-init) -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 1115b978607a..284c95587803 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -48,3 +48,22 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info) return 0; } + +struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size, + u64 *session_id, u64 *seq_num, bool is_last) +{ + return NULL; +} + +int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) +{ + int ret; + + migrate_disable(); + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, ctx); + rcu_read_unlock(); + migrate_enable(); + + return ret; +} diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c new file mode 100644 index 000000000000..bb3ad4c3bde5 --- /dev/null +++ b/kernel/bpf/map_iter.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <linux/bpf.h> +#include <linux/fs.h> +#include <linux/filter.h> +#include <linux/kernel.h> + +struct bpf_iter_seq_map_info { + struct bpf_map *map; + u32 id; +}; + +static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_map_info *info = seq->private; + struct bpf_map *map; + u32 id = info->id; + + map = bpf_map_get_curr_or_next(&id); + if (IS_ERR_OR_NULL(map)) + return NULL; + + ++*pos; + info->map = map; + info->id = id; + return map; +} + +static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_map_info *info = seq->private; + struct bpf_map *map; + + ++*pos; + ++info->id; + map = bpf_map_get_curr_or_next(&info->id); + if (IS_ERR_OR_NULL(map)) + return NULL; + + bpf_map_put(info->map); + info->map = map; + return map; +} + +struct bpf_iter__bpf_map { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); +}; + +int __init __bpf_iter__bpf_map(struct bpf_iter_meta *meta, struct bpf_map *map) +{ + return 0; +} + +static int bpf_map_seq_show(struct seq_file *seq, void *v) +{ + struct bpf_iter_meta meta; + struct bpf_iter__bpf_map ctx; + struct bpf_prog *prog; + int ret = 0; + + ctx.meta = &meta; + ctx.map = v; + meta.seq = seq; + prog = bpf_iter_get_prog(seq, sizeof(struct bpf_iter_seq_map_info), + &meta.session_id, &meta.seq_num, + v == (void *)0); + if (prog) + ret = bpf_iter_run_prog(prog, &ctx); + + return ret == 0 ? 0 : -EINVAL; +} + +static void bpf_map_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_map_info *info = seq->private; + + if (!v) + bpf_map_seq_show(seq, v); + + if (info->map) { + bpf_map_put(info->map); + info->map = NULL; + } +} + +static const struct seq_operations bpf_map_seq_ops = { + .start = bpf_map_seq_start, + .next = bpf_map_seq_next, + .stop = bpf_map_seq_stop, + .show = bpf_map_seq_show, +}; + +static int __init bpf_map_iter_init(void) +{ + struct bpf_iter_reg reg_info = { + .target = "bpf_map", + .target_func_name = "__bpf_iter__bpf_map", + .seq_ops = &bpf_map_seq_ops, + .seq_priv_size = sizeof(struct bpf_iter_seq_map_info), + .target_feature = 0, + }; + + return bpf_iter_reg_target(®_info); +} + +late_initcall(bpf_map_iter_init); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7626b8024471..022187640943 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2800,6 +2800,19 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr, return err; } +struct bpf_map *bpf_map_get_curr_or_next(u32 *id) +{ + struct bpf_map *map; + + spin_lock_bh(&map_idr_lock); + map = idr_get_next(&map_idr, id); + if (map) + map = __bpf_map_inc_not_zero(map, false); + spin_unlock_bh(&map_idr_lock); + + return map; +} + #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id struct bpf_prog *bpf_prog_by_id(u32 id) -- 2.24.1