Re: [PATCH v2 bpf-next 3/6] bpf: add fd_array_cnt attribute for prog_load

Hou Tao <houtao@xxxxxxxxxxxxxxx> · Tue, 26 Nov 2024 10:11:22 +0800



Hi,

On 11/19/2024 6:15 PM, Anton Protopopov wrote:
> The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set
> of file descriptors: maps or btfs. This field was introduced as a
> sparse array. Introduce a new attribute, fd_array_cnt, which, if
> present, indicates that the fd_array is a continuous array of the
> corresponding length.
>
> If fd_array_cnt is non-zero, then every map in the fd_array will be
> bound to the program, as if it was used by the program. This
> functionality is similar to the BPF_PROG_BIND_MAP syscall, but such
> maps can be used by the verifier during the program load.
>
> Signed-off-by: Anton Protopopov <aspsk@xxxxxxxxxxxxx>
> ---
>  include/uapi/linux/bpf.h       |  10 ++++
>  kernel/bpf/syscall.c           |   2 +-
>  kernel/bpf/verifier.c          | 106 ++++++++++++++++++++++++++++-----
>  tools/include/uapi/linux/bpf.h |  10 ++++
>  4 files changed, 113 insertions(+), 15 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4162afc6b5d0..2acf9b336371 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1573,6 +1573,16 @@ union bpf_attr {
>  		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
>  		 */
>  		__s32		prog_token_fd;
> +		/* The fd_array_cnt can be used to pass the length of the
> +		 * fd_array array. In this case all the [map] file descriptors
> +		 * passed in this array will be bound to the program, even if
> +		 * the maps are not referenced directly. The functionality is
> +		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> +		 * used by the verifier during the program load. If provided,
> +		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> +		 * continuous.
> +		 */
> +		__u32		fd_array_cnt;
>  	};
>  
>  	struct { /* anonymous struct used by BPF_OBJ_* commands */

SNIP
> +/*
> + * The add_fd_from_fd_array() is executed only if fd_array_cnt is given.  In
> + * this case expect that every file descriptor in the array is either a map or
> + * a BTF, or a hole (0). Everything else is considered to be trash.
> + */
> +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd)
> +{
> +	struct bpf_map *map;
> +	CLASS(fd, f)(fd);
> +	int ret;
> +
> +	map = __bpf_map_get(f);
> +	if (!IS_ERR(map)) {
> +		ret = add_used_map(env, map);
> +		if (ret < 0)
> +			return ret;
> +		return 0;
> +	}
> +
> +	if (!IS_ERR(__btf_get_by_fd(f)))
> +		return 0;

For fd_array_cnt > 0 case, does it need to handle BTF fd case ? If it
does, these returned BTFs should be saved in somewhere, otherewise,
these BTFs will be leaked.
> +
> +	if (!fd)
> +		return 0;
> +
> +	verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd);
> +	return PTR_ERR(map);
> +}
> +
> +static int env_init_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr)
> +{
> +	int size = sizeof(int) * attr->fd_array_cnt;
> +	int *copy;
> +	int ret;
> +	int i;
> +
> +	if (attr->fd_array_cnt >= MAX_USED_MAPS)
> +		return -E2BIG;
> +
> +	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> +
> +	/*
> +	 * The only difference between old (no fd_array_cnt is given) and new
> +	 * APIs is that in the latter case the fd_array is expected to be
> +	 * continuous and is scanned for map fds right away
> +	 */
> +	if (!size)
> +		return 0;
> +
> +	copy = kzalloc(size, GFP_KERNEL);
> +	if (!copy)
> +		return -ENOMEM;
> +
> +	if (copy_from_bpfptr_offset(copy, env->fd_array, 0, size)) {
> +		ret = -EFAULT;
> +		goto free_copy;
> +	}

It is better to use kvmemdup_bpfptr() instead.
> +
> +	for (i = 0; i < attr->fd_array_cnt; i++) {
> +		ret = add_fd_from_fd_array(env, copy[i]);
> +		if (ret)
> +			goto free_copy;
> +	}
> +
> +free_copy:
> +	kfree(copy);
> +	return ret;
> +}
> +
>  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
>  {
>  	u64 start_time = ktime_get_ns();
> @@ -22557,7 +22632,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>  		env->insn_aux_data[i].orig_idx = i;
>  	env->prog = *prog;
>  	env->ops = bpf_verifier_ops[env->prog->type];
> -	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
> +	ret = env_init_fd_array(env, attr, uattr);
> +	if (ret)
> +		goto err_free_aux_data;

These maps saved in env->used_map will also be leaked.
>  
>  	env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token);
>  	env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token);
> @@ -22775,6 +22852,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
>  err_unlock:
>  	if (!is_priv)
>  		mutex_unlock(&bpf_verifier_lock);
> +err_free_aux_data:
>  	vfree(env->insn_aux_data);
>  	kvfree(env->insn_hist);
>  err_free_env:
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 4162afc6b5d0..2acf9b336371 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1573,6 +1573,16 @@ union bpf_attr {
>  		 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set.
>  		 */
>  		__s32		prog_token_fd;
> +		/* The fd_array_cnt can be used to pass the length of the
> +		 * fd_array array. In this case all the [map] file descriptors
> +		 * passed in this array will be bound to the program, even if
> +		 * the maps are not referenced directly. The functionality is
> +		 * similar to the BPF_PROG_BIND_MAP syscall, but maps can be
> +		 * used by the verifier during the program load. If provided,
> +		 * then the fd_array[0,...,fd_array_cnt-1] is expected to be
> +		 * continuous.
> +		 */
> +		__u32		fd_array_cnt;
>  	};
>  
>  	struct { /* anonymous struct used by BPF_OBJ_* commands */