Re: [PATCH v2 bpf-next 3/6] bpf: add fd_array_cnt attribute for prog_load

Alexei Starovoitov <alexei.starovoitov@xxxxxxxxx> · Mon, 25 Nov 2024 17:38:20 -0800

On Tue, Nov 19, 2024 at 2:17 AM Anton Protopopov <aspsk@xxxxxxxxxxxxx> wrote:
>
> The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> of file descriptors: maps or btfs. This field was introduced as a
> sparse array. Introduce a new attribute, fd_array_cnt, which, if
> present, indicates that the fd_array is a continuous array of the
> corresponding length.
>
> If fd_array_cnt is non-zero, then every map in the fd_array will be
> bound to the program, as if it was used by the program. This
> functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> maps can be used by the verifier during the program load.
>
> Signed-off-by: Anton Protopopov <aspsk@xxxxxxxxxxxxx>
> ---
>  include/uapi/linux/bpf.h       |  10 ++++
>  kernel/bpf/syscall.c           |   2 +-
>  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
>  tools/include/uapi/linux/bpf.h |  10 ++++
>  4 files changed, 113 insertions(+), 15 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4162afc6b5d0..2acf9b336371 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1573,6 +1573,16 @@ union bpf_attr {
>                  * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
>                  */
>                 __s32           prog_token_fd;
> +               /* The fd_array_cnt can be used to pass the length of the
> +                * fd_array array. In this case all the [map] file descriptors
> +                * passed in this array will be bound to the program, even if
> +                * the maps are not referenced directly. The functionality is
> +                * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> +                * used by the verifier during the program load. If provided,
> +                * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> +                * continuous.
> +                */
> +               __u32           fd_array_cnt;
>         };
>
>         struct { /* anonymous struct used by BPF_OBJ_* commands */
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 58190ca724a2..7e3fbc23c742 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2729,7 +2729,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
>  }
>
>  /* last field in 'union bpf_attr' used by this command */
> -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd
> +#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt
>
>  static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
>  {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 8e034a22aa2a..a84ba93c0036 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -19181,22 +19181,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
>         return 0;
>  }
>
> -/* Add map behind fd to used maps list, if it's not already there, and return
> - * its index.
> - * Returns <0 on error, or >= 0 index, on success.
> - */
> -static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
> +static int add_used_map(struct bpf_verifier_env *env, struct bpf_map *map)

_from_fd suffix is imo too verbose.
Let's use __add_used_map() here instead?

>  {
> -       CLASS(fd, f)(fd);
> -       struct bpf_map *map;
>         int i, err;
>
> -       map = __bpf_map_get(f);
> -       if (IS_ERR(map)) {
> -               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
> -               return PTR_ERR(map);
> -       }
> -
>         /* check whether we recorded this map already */
>         for (i = 0; i < env->used_map_cnt; i++)
>                 if (env->used_maps[i] == map)
> @@ -19227,6 +19215,24 @@ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)
>         return env->used_map_cnt - 1;
>  }
>
> +/* Add map behind fd to used maps list, if it's not already there, and return
> + * its index.
> + * Returns <0 on error, or >= 0 index, on success.
> + */
> +static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd)

and keep this one as add_used_map() ?

> +{
> +       struct bpf_map *map;
> +       CLASS(fd, f)(fd);
> +
> +       map = __bpf_map_get(f);
> +       if (IS_ERR(map)) {
> +               verbose(env, "fd %d is not pointing to valid bpf_map\n", fd);
> +               return PTR_ERR(map);
> +       }
> +
> +       return add_used_map(env, map);
> +}
> +
>  /* find and rewrite pseudo imm in ld_imm64 instructions:
>   *
>   * 1. if it accesses map FD, replace it with actual map pointer.
> @@ -22526,6 +22532,75 @@ struct btf *bpf_get_btf_vmlinux(void)
>         return btf_vmlinux;
>  }
>
> +/*
> + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> + * this case expect that every file descriptor in the array is either a map or
> + * a BTF, or a hole (0). Everything else is considered to be trash.
> + */
> +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> +{
> +       struct bpf_map *map;
> +       CLASS(fd, f)(fd);
> +       int ret;
> +
> +       map = __bpf_map_get(f);
> +       if (!IS_ERR(map)) {
> +               ret = add_used_map(env, map);
> +               if (ret < 0)
> +                       return ret;
> +               return 0;
> +       }
> +
> +       if (!IS_ERR(__btf_get_by_fd(f)))
> +               return 0;
> +
> +       if (!fd)
> +               return 0;

This is not allowed in new apis.
zero cannot be special.

> +
> +       verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> +       return PTR_ERR(map);
> +}
> +
> +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)

What an odd name... why is 'env_' there?

> +{
> +       int size = sizeof(int) * attr->fd_array_cnt;

int overflow.

> +       int *copy;
> +       int ret;
> +       int i;
> +
> +       if (attr->fd_array_cnt >= MAX_USED_MAPS)
> +               return -E2BIG;
> +
> +       env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> +
> +       /*
> +        * The only difference between old (no fd_array_cnt is given) and new
> +        * APIs is that in the latter case the fd_array is expected to be
> +        * continuous and is scanned for map fds right away
> +        */
> +       if (!size)
> +               return 0;
> +
> +       copy = kzalloc(size, GFP_KERNEL);

the slab will WARN with dmesg splat on large sizes.

> +       if (!copy)
> +               return -ENOMEM;
> +
> +       if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
> +               ret = -EFAULT;
> +               goto free_copy;
> +       }
> +
> +       for (i = 0; i < attr->fd_array_cnt; i++) {
> +               ret = add_fd_from_fd_array(env, copy[i]);
> +               if (ret)
> +                       goto free_copy;
> +       }

I don't get this feature.
Why bother copying and checking for validity?
What does it buy ?

pw-bot: cr