On Tue, Mar 7, 2023 at 1:53 PM Andrii Nakryiko <andrii@xxxxxxxxxx> wrote: > > Implement first open-coded iterator type over a range of integers. > > It's public API consists of: > - bpf_iter_num_new() constructor, which accepts [start, end) range > (that is, start is inclusive, end is exclusive). > - bpf_iter_num_next() which will keep returning read-only pointer to int > until the range is exhausted, at which point NULL will be returned. > If bpf_iter_num_next() is kept calling after this, NULL will be > persistently returned. > - bpf_iter_num_destroy() destructor, which needs to be called at some > point to clean up iterator state. BPF verifier enforces that iterator > destructor is called at some point before BPF program exits. > > Note that `start = end = X` is a valid combination to setup empty > iterator. bpf_iter_num_new() will return 0 (success) for any such > combination. > > If bpf_iter_num_new() detects invalid combination of input arguments, it > returns error, resets iterator state to, effectively, empty iterator, so > any subsequent call to bpf_iter_num_next() will keep returning NULL. > > BPF verifier has no knowledge that returned integers are in the > [start, end) value range, as both `start` and `end` are not statically > known/enforced, they are runtime values only. > > While implementation is pretty trivial, some care needs to be taken to > avoid overflows and underflows. Subsequent selftests will validate > correctness of [start, end) semantics, especially around extremes > (INT_MIN and INT_MAX). > > Similarly to bpf_loop(), we enforce that no more than BPF_MAX_LOOPS can > be specified. > > bpf_iter_num_{new,next,destroy}() is a logical evolution from bounded > BPF loops and bpf_loop() helper and is the basis for implementing > ergonomic BPF loops with no statically known or verified bounds. > Subsequent patches implement bpf_for() macro, demonstrating how this can > be wrapped into something that works and feels like a normal for() loop > in C language. > > Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx> > --- > include/linux/bpf.h | 8 +++- > include/uapi/linux/bpf.h | 8 ++++ > kernel/bpf/bpf_iter.c | 70 ++++++++++++++++++++++++++++++++++ > kernel/bpf/helpers.c | 3 ++ > kernel/bpf/verifier.c | 6 +++ > tools/include/uapi/linux/bpf.h | 8 ++++ > 6 files changed, 101 insertions(+), 2 deletions(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 6792a7940e1e..e64ff1e89fb2 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1617,8 +1617,12 @@ struct bpf_array { > #define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ > #define MAX_TAIL_CALL_CNT 33 > > -/* Maximum number of loops for bpf_loop */ > -#define BPF_MAX_LOOPS BIT(23) > +/* Maximum number of loops for bpf_loop and bpf_iter_num. > + * It's enum to expose it (and thus make it discoverable) through BTF. > + */ > +enum { > + BPF_MAX_LOOPS = 8 * 1024 * 1024, > +}; > > #define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ > BPF_F_RDONLY_PROG | \ > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 976b194eb775..bf8b77d9a17e 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -7112,4 +7112,12 @@ enum { > BPF_F_TIMER_ABS = (1ULL << 0), > }; > > +/* BPF numbers iterator state */ > +struct bpf_iter_num { > + /* opaque iterator state; having __u64 here allows to preserve correct > + * alignment requirements in vmlinux.h, generated from BTF > + */ > + __u64 __opaque[1]; > +}; > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c > index 5dc307bdeaeb..96856f130cbf 100644 > --- a/kernel/bpf/bpf_iter.c > +++ b/kernel/bpf/bpf_iter.c > @@ -776,3 +776,73 @@ const struct bpf_func_proto bpf_loop_proto = { > .arg3_type = ARG_PTR_TO_STACK_OR_NULL, > .arg4_type = ARG_ANYTHING, > }; > + > +struct bpf_iter_num_kern { > + int cur; /* current value, inclusive */ > + int end; /* final value, exclusive */ > +} __aligned(8); > + > +__diag_push(); > +__diag_ignore_all("-Wmissing-prototypes", > + "Global functions as their definitions will be in vmlinux BTF"); > + > +__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) > +{ > + struct bpf_iter_num_kern *s = (void *)it; > + > + BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); > + > + BTF_TYPE_EMIT(struct btf_iter_num); > + > + /* start == end is legit, it's an empty range and we'll just get NULL > + * on first (and any subsequent) bpf_iter_num_next() call > + */ > + if (start > end) { > + s->cur = s->end = 0; > + return -EINVAL; > + } > + > + /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ > + if ((s64)end - (s64)start > BPF_MAX_LOOPS) { > + s->cur = s->end = 0; > + return -E2BIG; > + } > + > + /* user will call bpf_iter_num_next() first, > + * which will set s->cur to exactly start value; > + * underflow shouldn't matter > + */ > + s->cur = start - 1; > + s->end = end; > + > + return 0; > +} > + > +__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) > +{ > + struct bpf_iter_num_kern *s = (void *)it; > + > + /* check failed initialization or if we are done (same behavior); > + * need to be careful about overflow, so convert to s64 for checks, > + * e.g., if s->cur == s->end == INT_MAX, we can't just do > + * s->cur + 1 >= s->end > + */ > + if ((s64)(s->cur + 1) >= s->end) { > + s->cur = s->end = 0; > + return NULL; > + } > + > + s->cur++; > + > + return &s->cur; > +} > + > +__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) > +{ > + struct bpf_iter_num_kern *s = (void *)it; > + > + s->cur = s->end = 0; > +} > + > +__diag_pop(); > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index 637ac4e92e75..f9b7eeedce08 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -2411,6 +2411,9 @@ BTF_ID_FLAGS(func, bpf_rcu_read_lock) > BTF_ID_FLAGS(func, bpf_rcu_read_unlock) > BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) > +BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > BTF_SET8_END(common_btf_ids) > > static const struct btf_kfunc_id_set common_kfunc_set = { > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index d1246d187fac..33c305b563f6 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -9542,6 +9542,9 @@ enum special_kfunc_type { > KF_bpf_dynptr_from_xdp, > KF_bpf_dynptr_slice, > KF_bpf_dynptr_slice_rdwr, > + KF_bpf_iter_num_new, > + KF_bpf_iter_num_next, > + KF_bpf_iter_num_destroy, don't really need this, this is a leftover from v1, missed it. If this patch set is going to be applied as is, please strip these three rows. > }; > > BTF_SET_START(special_kfunc_set) > @@ -9580,6 +9583,9 @@ BTF_ID(func, bpf_dynptr_from_skb) > BTF_ID(func, bpf_dynptr_from_xdp) > BTF_ID(func, bpf_dynptr_slice) > BTF_ID(func, bpf_dynptr_slice_rdwr) > +BTF_ID(func, bpf_iter_num_new) > +BTF_ID(func, bpf_iter_num_next) > +BTF_ID(func, bpf_iter_num_destroy) > > static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta) > { > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 976b194eb775..bf8b77d9a17e 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -7112,4 +7112,12 @@ enum { > BPF_F_TIMER_ABS = (1ULL << 0), > }; > > +/* BPF numbers iterator state */ > +struct bpf_iter_num { > + /* opaque iterator state; having __u64 here allows to preserve correct > + * alignment requirements in vmlinux.h, generated from BTF > + */ > + __u64 __opaque[1]; > +}; > + > #endif /* _UAPI__LINUX_BPF_H__ */ > -- > 2.34.1 >