On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@xxxxxxxxxxxxx> wrote: > > The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set > of file descriptors: maps or btfs. This field was introduced as a > sparse array. Introduce a new attribute, fd_array_cnt, which, if > present, indicates that the fd_array is a continuous array of the > corresponding length. > > If fd_array_cnt is non-zero, then every map in the fd_array will be > bound to the program, as if it was used by the program. This > functionality is similar to the BPF_PROG_BIND_MAP syscall, but such > maps can be used by the verifier during the program load. > > Signed-off-by: Anton Protopopov <aspsk@xxxxxxxxxxxxx> > --- > include/uapi/linux/bpf.h | 10 ++++ > kernel/bpf/syscall.c | 2 +- > kernel/bpf/verifier.c | 106 ++++++++++++++++++++++++++++----- > tools/include/uapi/linux/bpf.h | 10 ++++ > 4 files changed, 113 insertions(+), 15 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 4162afc6b5d0..2acf9b336371 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -1573,6 +1573,16 @@ union bpf_attr { > * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. > */ > __s32 prog_token_fd; > + /* The fd_array_cnt can be used to pass the length of the > + * fd_array array. In this case all the [map] file descriptors > + * passed in this array will be bound to the program, even if > + * the maps are not referenced directly. The functionality is > + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be > + * used by the verifier during the program load. If provided, > + * then the fd_array[0,...,fd_array_cnt-1] is expected to be > + * continuous. > + */ > + __u32 fd_array_cnt; > }; > > struct { /* anonymous struct used by BPF_OBJ_* commands */ > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 58190ca724a2..7e3fbc23c742 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -2729,7 +2729,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) > } > > /* last field in 'union bpf_attr' used by this command */ > -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd > +#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt > > static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) > { > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 8e034a22aa2a..a84ba93c0036 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -19181,22 +19181,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, > return 0; > } > > -/* Add map behind fd to used maps list, if it's not already there, and return > - * its index. > - * Returns <0 on error, or >= 0 index, on success. > - */ > -static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd) > +static int add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) _from_fd suffix is imo too verbose. Let's use __add_used_map() here instead? > { > - CLASS(fd, f)(fd); > - struct bpf_map *map; > int i, err; > > - map = __bpf_map_get(f); > - if (IS_ERR(map)) { > - verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); > - return PTR_ERR(map); > - } > - > /* check whether we recorded this map already */ > for (i = 0; i < env->used_map_cnt; i++) > if (env->used_maps[i] == map) > @@ -19227,6 +19215,24 @@ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd) > return env->used_map_cnt - 1; > } > > +/* Add map behind fd to used maps list, if it's not already there, and return > + * its index. > + * Returns <0 on error, or >= 0 index, on success. > + */ > +static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd) and keep this one as add_used_map() ? > +{ > + struct bpf_map *map; > + CLASS(fd, f)(fd); > + > + map = __bpf_map_get(f); > + if (IS_ERR(map)) { > + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); > + return PTR_ERR(map); > + } > + > + return add_used_map(env, map); > +} > + > /* find and rewrite pseudo imm in ld_imm64 instructions: > * > * 1. if it accesses map FD, replace it with actual map pointer. > @@ -22526,6 +22532,75 @@ struct btf *bpf_get_btf_vmlinux(void) > return btf_vmlinux; > } > > +/* > + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given. In > + * this case expect that every file descriptor in the array is either a map or > + * a BTF, or a hole (0). Everything else is considered to be trash. > + */ > +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd) > +{ > + struct bpf_map *map; > + CLASS(fd, f)(fd); > + int ret; > + > + map = __bpf_map_get(f); > + if (!IS_ERR(map)) { > + ret = add_used_map(env, map); > + if (ret < 0) > + return ret; > + return 0; > + } > + > + if (!IS_ERR(__btf_get_by_fd(f))) > + return 0; > + > + if (!fd) > + return 0; This is not allowed in new apis. zero cannot be special. > + > + verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd); > + return PTR_ERR(map); > +} > + > +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr) What an odd name... why is 'env_' there? > +{ > + int size = sizeof(int) * attr->fd_array_cnt; int overflow. > + int *copy; > + int ret; > + int i; > + > + if (attr->fd_array_cnt >= MAX_USED_MAPS) > + return -E2BIG; > + > + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); > + > + /* > + * The only difference between old (no fd_array_cnt is given) and new > + * APIs is that in the latter case the fd_array is expected to be > + * continuous and is scanned for map fds right away > + */ > + if (!size) > + return 0; > + > + copy = kzalloc(size, GFP_KERNEL); the slab will WARN with dmesg splat on large sizes. > + if (!copy) > + return -ENOMEM; > + > + if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) { > + ret = -EFAULT; > + goto free_copy; > + } > + > + for (i = 0; i < attr->fd_array_cnt; i++) { > + ret = add_fd_from_fd_array(env, copy[i]); > + if (ret) > + goto free_copy; > + } I don't get this feature. Why bother copying and checking for validity? What does it buy ? pw-bot: cr