This patch adds the kernel-side and API changes for a new helper function, bpf_for_each: long bpf_for_each(u32 nr_interations, void *callback_fn, void *callback_ctx, u64 flags); bpf_for_each invokes the "callback_fn" nr_iterations number of times or until the callback_fn returns 1. A few things to please note: ~ The "u64 flags" parameter is currently unused but is included in case a future use case for it arises. ~ In the kernel-side implementation of bpf_for_each (kernel/bpf/bpf_iter.c), bpf_callback_t is used as the callback function cast. ~ A program can have nested bpf_for_each calls but the program must still adhere to the verifier constraint of its stack depth (the stack depth cannot exceed MAX_BPF_STACK)) ~ The next patch will include the tests and benchmark Signed-off-by: Joanne Koong <joannekoong@xxxxxx> --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 23 +++++++++++++++++++++++ kernel/bpf/bpf_iter.c | 32 ++++++++++++++++++++++++++++++++ kernel/bpf/helpers.c | 2 ++ kernel/bpf/verifier.c | 28 ++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 23 +++++++++++++++++++++++ 6 files changed, 109 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6deebf8bf78f..d9b69a896c91 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2107,6 +2107,7 @@ extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; +extern const struct bpf_func_proto bpf_for_each_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; extern const struct bpf_func_proto bpf_sk_setsockopt_proto; extern const struct bpf_func_proto bpf_sk_getsockopt_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bd0c9f0487f6..ea5098920ed2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4750,6 +4750,28 @@ union bpf_attr { * The number of traversed map elements for success, **-EINVAL** for * invalid **flags**. * + * long bpf_for_each(u32 nr_iterations, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_iterations**, call **callback_fn** function with + * **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the iteration. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * iteration. If return value is 1, the helper will skip the rest of + * the iterations and return. Other return values are not used now. + * + * Return + * The number of iterations performed, **-EINVAL** for invalid **flags** + * or a null **callback_fn**. + * * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len) * Description * Outputs a string into the **str** buffer of size **str_size** @@ -5105,6 +5127,7 @@ union bpf_attr { FN(sock_from_file), \ FN(check_mtu), \ FN(for_each_map_elem), \ + FN(for_each), \ FN(snprintf), \ FN(sys_bpf), \ FN(btf_find_by_name_kind), \ diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b2ee45064e06..cb742c50898a 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -714,3 +714,35 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = { .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; + +BPF_CALL_4(bpf_for_each, u32, nr_iterations, void *, callback_fn, void *, callback_ctx, + u64, flags) +{ + bpf_callback_t callback = (bpf_callback_t)callback_fn; + u64 err; + u32 i; + + if (flags) + return -EINVAL; + + for (i = 0; i < nr_iterations; i++) { + err = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); + /* return value: 0 - continue, 1 - stop and return */ + if (err) { + i++; + break; + } + } + + return i; +} + +const struct bpf_func_proto bpf_for_each_proto = { + .func = bpf_for_each, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_FUNC, + .arg3_type = ARG_PTR_TO_STACK_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ffd469c217f..ac10e1b3859f 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_query_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; + case BPF_FUNC_for_each: + return &bpf_for_each_proto; default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3c8aa7df1773..7c9cc4f01104 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6103,6 +6103,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env, return 0; } +static int set_for_each_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) +{ + /* bpf_for_each(u32 nr_iterations, void *callback_fn, void *callback_ctx, + * u64 flags); + * callback_fn(u32 index, void *callback_ctx); + */ + callee->regs[BPF_REG_1].type = SCALAR_VALUE; + callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_3]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_4]); + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + + callee->in_callback_fn = true; + return 0; +} + static int set_timer_callback_state(struct bpf_verifier_env *env, struct bpf_func_state *caller, struct bpf_func_state *callee, @@ -6482,6 +6503,13 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn return -EINVAL; } + if (func_id == BPF_FUNC_for_each) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_for_each_callback_state); + if (err < 0) + return -EINVAL; + } + if (func_id == BPF_FUNC_timer_set_callback) { err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, set_timer_callback_state); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bd0c9f0487f6..ea5098920ed2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4750,6 +4750,28 @@ union bpf_attr { * The number of traversed map elements for success, **-EINVAL** for * invalid **flags**. * + * long bpf_for_each(u32 nr_iterations, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For **nr_iterations**, call **callback_fn** function with + * **callback_ctx** as the context parameter. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * long (\*callback_fn)(u32 index, void \*ctx); + * + * where **index** is the current index in the iteration. The index + * is zero-indexed. + * + * If **callback_fn** returns 0, the helper will continue to the next + * iteration. If return value is 1, the helper will skip the rest of + * the iterations and return. Other return values are not used now. + * + * Return + * The number of iterations performed, **-EINVAL** for invalid **flags** + * or a null **callback_fn**. + * * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len) * Description * Outputs a string into the **str** buffer of size **str_size** @@ -5105,6 +5127,7 @@ union bpf_attr { FN(sock_from_file), \ FN(check_mtu), \ FN(for_each_map_elem), \ + FN(for_each), \ FN(snprintf), \ FN(sys_bpf), \ FN(btf_find_by_name_kind), \ -- 2.30.2