Re: [PATCH v10 bpf-next 8/9] bpf: Add bpf_dynptr_slice and bpf_dynptr_slice_rdwr

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 16, 2023 at 3:00 PM Joanne Koong <joannelkoong@xxxxxxxxx> wrote:
>
> Two new kfuncs are added, bpf_dynptr_slice and bpf_dynptr_slice_rdwr.
> The user must pass in a buffer to store the contents of the data slice
> if a direct pointer to the data cannot be obtained.
>
> For skb and xdp type dynptrs, these two APIs are the only way to obtain
> a data slice. However, for other types of dynptrs, there is no
> difference between bpf_dynptr_slice(_rdwr) and bpf_dynptr_data.
>
> For skb type dynptrs, the data is copied into the user provided buffer
> if any of the data is not in the linear portion of the skb. For xdp type
> dynptrs, the data is copied into the user provided buffer if the data is
> between xdp frags.
>
> If the skb is cloned and a call to bpf_dynptr_data_rdwr is made, then
> the skb will be uncloned (see bpf_unclone_prologue()).
>
> Please note that any bpf_dynptr_write() automatically invalidates any prior
> data slices of the skb dynptr. This is because the skb may be cloned or
> may need to pull its paged buffer into the head. As such, any
> bpf_dynptr_write() will automatically have its prior data slices
> invalidated, even if the write is to data in the skb head of an uncloned
> skb. Please note as well that any other helper calls that change the
> underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data
> slices of the skb dynptr as well, for the same reasons.
>
> Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
> ---
>  include/linux/filter.h         | 14 ++++++
>  include/uapi/linux/bpf.h       |  5 ++
>  kernel/bpf/helpers.c           | 91 ++++++++++++++++++++++++++++++++++
>  kernel/bpf/verifier.c          | 88 ++++++++++++++++++++++++++++++--
>  net/core/filter.c              |  6 +--
>  tools/include/uapi/linux/bpf.h |  5 ++
>  6 files changed, 202 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 3f6992261ec5..efa5d4a1677e 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -1548,6 +1548,9 @@ int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
>                           u32 len, u64 flags);
>  int __bpf_xdp_load_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
>  int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset, void *buf, u32 len);
> +void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len);
> +void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
> +                     void *buf, unsigned long len, bool flush);
>  #else /* CONFIG_NET */
>  static inline int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset,
>                                        void *to, u32 len)
> @@ -1572,6 +1575,17 @@ static inline int __bpf_xdp_store_bytes(struct xdp_buff *xdp, u32 offset,
>  {
>         return -EOPNOTSUPP;
>  }
> +
> +static inline void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
> +{
> +       return NULL;
> +}
> +
> +static inline void *bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, void *buf,
> +                                    unsigned long len, bool flush)
> +{
> +       return NULL;
> +}
>  #endif /* CONFIG_NET */
>
>  #endif /* __LINUX_FILTER_H__ */
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index e7435acbdd30..ac406af207c3 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5324,6 +5324,11 @@ union bpf_attr {
>   *             *flags* must be 0 except for skb-type dynptrs.
>   *
>   *             For skb-type dynptrs:
> + *                 *  All data slices of the dynptr are automatically
> + *                    invalidated after **bpf_dynptr_write**\ (). This is
> + *                    because writing may pull the skb and change the
> + *                    underlying packet buffer.
> + *
>   *                 *  For *flags*, please see the flags accepted by
>   *                    **bpf_skb_store_bytes**\ ().
>   *     Return
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 989be97b0f81..0586f54e4f9e 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -2177,6 +2177,94 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
>         return p;
>  }
>
> +/**
> + * bpf_dynptr_slice - Obtain a read-only pointer to the dynptr data.
> + *
> + * For non-skb and non-xdp type dynptrs, there is no difference between
> + * bpf_dynptr_slice and bpf_dynptr_data.
> + *
> + * If the intention is to write to the data slice, please use
> + * bpf_dynptr_slice_rdwr.
> + *
> + * @ptr: The dynptr whose data slice to retrieve
> + * @offset: Offset into the dynptr
> + * @buffer: User-provided buffer to copy contents into
> + * @buffer__sz: Size (in bytes) of the buffer. This is the length of the
> + * requested slice
> + *
> + * @returns: NULL if the call failed (eg invalid dynptr), pointer to a read-only
> + * data slice (can be either direct pointer to the data or a pointer to the user
> + * provided buffer, with its contents containing the data, if unable to obtain
> + * direct pointer)
> + */
> +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset,
> +                                  void *buffer, u32 buffer__sz)
> +{
> +       enum bpf_dynptr_type type;
> +       u32 len = buffer__sz;
> +       int err;
> +
> +       if (!ptr->data)
> +               return 0;
> +
> +       err = bpf_dynptr_check_off_len(ptr, offset, len);
> +       if (err)
> +               return 0;
> +
> +       type = bpf_dynptr_get_type(ptr);
> +
> +       switch (type) {
> +       case BPF_DYNPTR_TYPE_LOCAL:
> +       case BPF_DYNPTR_TYPE_RINGBUF:
> +               return ptr->data + ptr->offset + offset;
> +       case BPF_DYNPTR_TYPE_SKB:
> +               return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer);
> +       case BPF_DYNPTR_TYPE_XDP:
> +       {
> +               void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len);
> +               if (xdp_ptr)
> +                       return xdp_ptr;
> +
> +               bpf_xdp_copy_buf(ptr->data, ptr->offset + offset, buffer, len, false);
> +               return buffer;
> +       }
> +       default:
> +               WARN_ONCE(true, "unknown dynptr type %d\n", type);
> +               return 0;
> +       }
> +}
> +
> +/**
> + * bpf_dynptr_slice_rdwr - Obtain a pointer to the dynptr data.
> + *
> + * For non-skb and non-xdp type dynptrs, there is no difference between
> + * bpf_dynptr_slice and bpf_dynptr_data.
> + *
> + * @ptr: The dynptr whose data slice to retrieve
> + * @offset: Offset into the dynptr
> + * @buffer: User-provided buffer to copy contents into
> + * @buffer__sz: Size (in bytes) of the buffer. This is the length of the
> + * requested slice
> + *
> + * @returns: NULL if the call failed (eg invalid dynptr), pointer to a
> + * data slice (can be either direct pointer to the data or a pointer to the user
> + * provided buffer, with its contents containing the data, if unable to obtain
> + * direct pointer)
> + */
> +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset,
> +                                       void *buffer, u32 buffer__sz)
> +{
> +       if (!ptr->data || bpf_dynptr_is_rdonly(ptr))
> +               return 0;
> +
> +       /* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice.
> +        *
> +        * For skb-type dynptrs, the verifier has already ensured that the skb
> +        * head is writable (see bpf_unclone_prologue()).
> +        */
> +       return bpf_dynptr_slice(ptr, offset, buffer, buffer__sz);
> +}
> +
>  __bpf_kfunc void *bpf_cast_to_kern_ctx(void *obj)
>  {
>         return obj;
> @@ -2224,6 +2312,8 @@ BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
>  BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
>  #endif
>  BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
> +BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
> +BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)

upon further thought, I think these two should be listed in the
"common_btf_ids" BTF set, not "generic_btf_ids", since common_btf_ids
is registered for BPF_PROG_TYPE_UNSPEC prog types

>  BTF_SET8_END(generic_btf_ids)
>
>  static const struct btf_kfunc_id_set generic_kfunc_set = {
> @@ -2271,6 +2361,7 @@ static int __init kfunc_init(void)
>         ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
>         ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
>         ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
> +       ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set);
>         ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
>                                                   ARRAY_SIZE(generic_dtors),
>                                                   THIS_MODULE);
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 0b767134ecaa..aa8d48b00cc2 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -759,6 +759,22 @@ static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
>         }
>  }
>
> +static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
> +{
> +       switch (type) {
> +       case BPF_DYNPTR_TYPE_LOCAL:
> +               return DYNPTR_TYPE_LOCAL;
> +       case BPF_DYNPTR_TYPE_RINGBUF:
> +               return DYNPTR_TYPE_RINGBUF;
> +       case BPF_DYNPTR_TYPE_SKB:
> +               return DYNPTR_TYPE_SKB;
> +       case BPF_DYNPTR_TYPE_XDP:
> +               return DYNPTR_TYPE_XDP;
> +       default:
> +               return 0;
> +       }
> +}
> +
>  static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
>  {
>         return type == BPF_DYNPTR_TYPE_RINGBUF;
> @@ -1677,6 +1693,12 @@ static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
>                reg->type == PTR_TO_PACKET_END;
>  }
>
> +static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
> +{
> +       return base_type(reg->type) == PTR_TO_MEM &&
> +               (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
> +}
> +
>  /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
>  static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
>                                     enum bpf_reg_type which)
> @@ -7404,6 +7426,9 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
>
>  /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
>   * are now invalid, so turn them into unknown SCALAR_VALUE.
> + *
> + * This also applies to dynptr slices belonging to skb and xdp dynptrs,
> + * since these slices point to packet data.
>   */
>  static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
>  {
> @@ -7411,7 +7436,7 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
>         struct bpf_reg_state *reg;
>
>         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
> -               if (reg_is_pkt_pointer_any(reg))
> +               if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
>                         __mark_reg_unknown(env, reg);
>         }));
>  }
> @@ -8667,6 +8692,11 @@ struct bpf_kfunc_call_arg_meta {
>         struct {
>                 struct btf_field *field;
>         } arg_rbtree_root;
> +       struct {
> +               enum bpf_dynptr_type type;
> +               u32 id;
> +       } initialized_dynptr;
> +       u64 mem_size;
>  };
>
>  static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
> @@ -8923,6 +8953,8 @@ enum special_kfunc_type {
>         KF_bpf_rbtree_first,
>         KF_bpf_dynptr_from_skb,
>         KF_bpf_dynptr_from_xdp,
> +       KF_bpf_dynptr_slice,
> +       KF_bpf_dynptr_slice_rdwr,
>  };
>
>  BTF_SET_START(special_kfunc_set)
> @@ -8939,6 +8971,8 @@ BTF_ID(func, bpf_rbtree_add)
>  BTF_ID(func, bpf_rbtree_first)
>  BTF_ID(func, bpf_dynptr_from_skb)
>  BTF_ID(func, bpf_dynptr_from_xdp)
> +BTF_ID(func, bpf_dynptr_slice)
> +BTF_ID(func, bpf_dynptr_slice_rdwr)
>  BTF_SET_END(special_kfunc_set)
>
>  BTF_ID_LIST(special_kfunc_list)
> @@ -8957,6 +8991,8 @@ BTF_ID(func, bpf_rbtree_add)
>  BTF_ID(func, bpf_rbtree_first)
>  BTF_ID(func, bpf_dynptr_from_skb)
>  BTF_ID(func, bpf_dynptr_from_xdp)
> +BTF_ID(func, bpf_dynptr_slice)
> +BTF_ID(func, bpf_dynptr_slice_rdwr)
>
>  static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
>  {
> @@ -9710,12 +9746,24 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>                                 dynptr_arg_type |= MEM_UNINIT | DYNPTR_TYPE_SKB;
>                         else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp])
>                                 dynptr_arg_type |= MEM_UNINIT | DYNPTR_TYPE_XDP;
> -                       else
> +                       else if (reg->type == CONST_PTR_TO_DYNPTR)
>                                 dynptr_arg_type |= MEM_RDONLY;
>
>                         ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
>                         if (ret < 0)
>                                 return ret;
> +
> +                       if (!(dynptr_arg_type & MEM_UNINIT)) {
> +                               int id = dynptr_id(env, reg);
> +
> +                               if (id < 0) {
> +                                       verbose(env, "verifier internal error: failed to obtain dynptr id\n");
> +                                       return id;
> +                               }
> +                               meta->initialized_dynptr.id = id;
> +                               meta->initialized_dynptr.type = dynptr_get_type(env, reg);
> +                       }
> +
>                         break;
>                 }
>                 case KF_ARG_PTR_TO_LIST_HEAD:
> @@ -9966,8 +10014,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                 }
>         }
>
> -       for (i = 0; i < CALLER_SAVED_REGS; i++)
> -               mark_reg_not_init(env, regs, caller_saved[i]);
> +       mark_reg_not_init(env, regs, caller_saved[BPF_REG_0]);
>
>         /* Check return type */
>         t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
> @@ -10053,6 +10100,36 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
>                                 regs[BPF_REG_0].btf = desc_btf;
>                                 regs[BPF_REG_0].btf_id = meta.arg_constant.value;
> +                       } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
> +                                  meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
> +                               enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
> +
> +                               mark_reg_known_zero(env, regs, BPF_REG_0);
> +
> +                               if (!tnum_is_const(regs[BPF_REG_4].var_off)) {
> +                                       verbose(env, "mem_size must be a constant\n");
> +                                       return -EINVAL;
> +                               }
> +                               regs[BPF_REG_0].mem_size = regs[BPF_REG_4].var_off.value;
> +
> +                               /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
> +                               regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
> +
> +                               if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice])
> +                                       regs[BPF_REG_0].type |= MEM_RDONLY;
> +                               else
> +                                       env->seen_direct_write = true;
> +
> +                               if (!meta.initialized_dynptr.id) {
> +                                       verbose(env, "verifier internal error: no dynptr id\n");
> +                                       return -EFAULT;
> +                               }
> +                               regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
> +
> +                               /* we don't need to set BPF_REG_0's ref obj id
> +                                * because packet slices are not refcounted (see
> +                                * dynptr_type_refcounted)
> +                                */
>                         } else {
>                                 verbose(env, "kernel function %s unhandled dynamic return type\n",
>                                         meta.func_name);
> @@ -10112,6 +10189,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
>                         regs[BPF_REG_0].id = ++env->id_gen;
>         } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
>
> +       for (i = BPF_REG_1; i < CALLER_SAVED_REGS; i++)
> +               mark_reg_not_init(env, regs, caller_saved[i]);
> +
>         nargs = btf_type_vlen(func_proto);
>         args = (const struct btf_param *)(func_proto + 1);
>         for (i = 0; i < nargs; i++) {
> diff --git a/net/core/filter.c b/net/core/filter.c
> index dcf1e6d2582d..0bd62b2b25e1 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -3894,8 +3894,8 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
>         .arg2_type      = ARG_ANYTHING,
>  };
>
> -static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
> -                            void *buf, unsigned long len, bool flush)
> +void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
> +                     void *buf, unsigned long len, bool flush)
>  {
>         unsigned long ptr_len, ptr_off = 0;
>         skb_frag_t *next_frag, *end_frag;
> @@ -3941,7 +3941,7 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
>         }
>  }
>
> -static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
> +void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
>  {
>         struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
>         u32 size = xdp->data_end - xdp->data;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index e7435acbdd30..ac406af207c3 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -5324,6 +5324,11 @@ union bpf_attr {
>   *             *flags* must be 0 except for skb-type dynptrs.
>   *
>   *             For skb-type dynptrs:
> + *                 *  All data slices of the dynptr are automatically
> + *                    invalidated after **bpf_dynptr_write**\ (). This is
> + *                    because writing may pull the skb and change the
> + *                    underlying packet buffer.
> + *
>   *                 *  For *flags*, please see the flags accepted by
>   *                    **bpf_skb_store_bytes**\ ().
>   *     Return
> --
> 2.30.2
>



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux