Re: [PATCH bpf-next v4 1/3] bpf: Add skb dynptrs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Aug 22, 2022 at 4:57 PM Joanne Koong <joannelkoong@xxxxxxxxx> wrote:
>
> Add skb dynptrs, which are dynptrs whose underlying pointer points
> to a skb. The dynptr acts on skb data. skb dynptrs have two main
> benefits. One is that they allow operations on sizes that are not
> statically known at compile-time (eg variable-sized accesses).
> Another is that parsing the packet data through dynptrs (instead of
> through direct access of skb->data and skb->data_end) can be more
> ergonomic and less brittle (eg does not need manual if checking for
> being within bounds of data_end).
>
> For bpf prog types that don't support writes on skb data, the dynptr is
> read-only. For reads and writes through the bpf_dynptr_read() and
> bpf_dynptr_write() interfaces, this supports reading and writing into
> data in the non-linear paged buffers. For data slices (through the
> bpf_dynptr_data() interface), if the data is in a paged buffer, the user
> must first call bpf_skb_pull_data() to pull the data into the linear
> portion. The returned data slice from a call to bpf_dynptr_data() is of
> reg type PTR_TO_PACKET | PTR_MAYBE_NULL.
>
> Any bpf_dynptr_write() automatically invalidates any prior data slices
> to the skb dynptr. This is because a bpf_dynptr_write() may be writing
> to data in a paged buffer, so it will need to pull the buffer first into
> the head. The reason it needs to be pulled instead of writing directly to
> the paged buffers is because they may be cloned (only the head of the skb
> is by default uncloned). As such, any bpf_dynptr_write() will
> automatically have its prior data slices invalidated, even if the write
> is to data in the skb head (the verifier has no way of differentiating
> whether the write is to the head or paged buffers during program load
> time). Please note as well that any other helper calls that change the
> underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data
> slices of the skb dynptr as well. Whenever such a helper call is made,
> the verifier marks any PTR_TO_PACKET reg type (which includes skb dynptr
> slices since they are PTR_TO_PACKETs) as unknown. The stack trace for
> this is check_helper_call() -> clear_all_pkt_pointers() ->
> __clear_all_pkt_pointers() -> mark_reg_unknown()
>
> For examples of how skb dynptrs can be used, please see the attached
> selftests.
>
> Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
> ---
>  include/linux/bpf.h            | 83 +++++++++++++++++-----------
>  include/linux/filter.h         |  4 ++
>  include/uapi/linux/bpf.h       | 40 ++++++++++++--
>  kernel/bpf/helpers.c           | 81 +++++++++++++++++++++++++---
>  kernel/bpf/verifier.c          | 99 ++++++++++++++++++++++++++++------
>  net/core/filter.c              | 53 ++++++++++++++++--
>  tools/include/uapi/linux/bpf.h | 40 ++++++++++++--
>  7 files changed, 335 insertions(+), 65 deletions(-)
>

[...]

> @@ -1521,9 +1532,19 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
>         if (err)
>                 return err;
>
> -       memcpy(dst, src->data + src->offset + offset, len);
> +       type = bpf_dynptr_get_type(src);
>
> -       return 0;
> +       switch (type) {
> +       case BPF_DYNPTR_TYPE_LOCAL:
> +       case BPF_DYNPTR_TYPE_RINGBUF:
> +               memcpy(dst, src->data + src->offset + offset, len);
> +               return 0;
> +       case BPF_DYNPTR_TYPE_SKB:
> +               return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
> +       default:
> +               WARN(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);

nit: probably better to WARN_ONCE?

> +               return -EFAULT;
> +       }
>  }
>
>  static const struct bpf_func_proto bpf_dynptr_read_proto = {
> @@ -1540,18 +1561,32 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
>  BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
>            u32, len, u64, flags)
>  {
> +       enum bpf_dynptr_type type;
>         int err;
>
> -       if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
> +       if (!dst->data || bpf_dynptr_is_rdonly(dst))
>                 return -EINVAL;
>
>         err = bpf_dynptr_check_off_len(dst, offset, len);
>         if (err)
>                 return err;
>
> -       memcpy(dst->data + dst->offset + offset, src, len);
> +       type = bpf_dynptr_get_type(dst);
>
> -       return 0;
> +       switch (type) {
> +       case BPF_DYNPTR_TYPE_LOCAL:
> +       case BPF_DYNPTR_TYPE_RINGBUF:
> +               if (flags)
> +                       return -EINVAL;
> +               memcpy(dst->data + dst->offset + offset, src, len);
> +               return 0;
> +       case BPF_DYNPTR_TYPE_SKB:
> +               return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
> +                                            flags);
> +       default:
> +               WARN(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);

ditto about WARN_ONCE

> +               return -EFAULT;
> +       }
>  }
>
>  static const struct bpf_func_proto bpf_dynptr_write_proto = {

[...]

> +static enum bpf_dynptr_type stack_slot_get_dynptr_info(struct bpf_verifier_env *env,
> +                                                      struct bpf_reg_state *reg,
> +                                                      int *ref_obj_id)
>  {
>         struct bpf_func_state *state = func(env, reg);
>         int spi = get_spi(reg->off);
>
> -       return state->stack[spi].spilled_ptr.id;
> +       if (ref_obj_id)
> +               *ref_obj_id = state->stack[spi].spilled_ptr.id;
> +
> +       return state->stack[spi].spilled_ptr.dynptr.type;
>  }
>
>  static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
> @@ -6056,7 +6075,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
>                         case DYNPTR_TYPE_RINGBUF:
>                                 err_extra = "ringbuf ";
>                                 break;
> -                       default:
> +                       case DYNPTR_TYPE_SKB:
> +                               err_extra = "skb ";
>                                 break;
>                         }
>
> @@ -7149,6 +7169,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
>  {
>         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
>         const struct bpf_func_proto *fn = NULL;
> +       enum bpf_dynptr_type dynptr_type;

compiler technically can complain about use of uninitialized
dynptr_type, maybe initialize it to BPF_DYNPTR_TYPE_INVALID ?

>         enum bpf_return_type ret_type;
>         enum bpf_type_flag ret_flag;
>         struct bpf_reg_state *regs;
> @@ -7320,24 +7341,43 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
>                         }
>                 }
>                 break;
> -       case BPF_FUNC_dynptr_data:
> -               for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
> -                       if (arg_type_is_dynptr(fn->arg_type[i])) {
> -                               if (meta.ref_obj_id) {
> -                                       verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
> -                                       return -EFAULT;
> -                               }
> -                               /* Find the id of the dynptr we're tracking the reference of */
> -                               meta.ref_obj_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
> -                               break;
> -                       }
> +       case BPF_FUNC_dynptr_write:
> +       {
> +               struct bpf_reg_state *reg;
> +
> +               reg = get_dynptr_arg_reg(fn, regs);
> +               if (!reg) {
> +                       verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");

s/bpf_dynptr_data/bpf_dynptr_write/

> +                       return -EFAULT;
>                 }
> -               if (i == MAX_BPF_FUNC_REG_ARGS) {
> +
> +               /* bpf_dynptr_write() for skb-type dynptrs may pull the skb, so we must
> +                * invalidate all data slices associated with it
> +                */
> +               if (stack_slot_get_dynptr_info(env, reg, NULL) == BPF_DYNPTR_TYPE_SKB)
> +                       changes_data = true;
> +
> +               break;
> +       }
> +       case BPF_FUNC_dynptr_data:
> +       {
> +               struct bpf_reg_state *reg;
> +
> +               reg = get_dynptr_arg_reg(fn, regs);
> +               if (!reg) {
>                         verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
>                         return -EFAULT;
>                 }
> +
> +               if (meta.ref_obj_id) {
> +                       verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
> +                       return -EFAULT;
> +               }
> +
> +               dynptr_type = stack_slot_get_dynptr_info(env, reg, &meta.ref_obj_id);
>                 break;
>         }
> +       }
>
>         if (err)
>                 return err;
> @@ -7397,8 +7437,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
>                 break;
>         case RET_PTR_TO_ALLOC_MEM:
>                 mark_reg_known_zero(env, regs, BPF_REG_0);
> -               regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
> -               regs[BPF_REG_0].mem_size = meta.mem_size;
> +
> +               if (func_id == BPF_FUNC_dynptr_data &&
> +                   dynptr_type == BPF_DYNPTR_TYPE_SKB) {
> +                       regs[BPF_REG_0].type = PTR_TO_PACKET | ret_flag;
> +                       regs[BPF_REG_0].range = meta.mem_size;
> +               } else {
> +                       regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
> +                       regs[BPF_REG_0].mem_size = meta.mem_size;
> +               }
>                 break;
>         case RET_PTR_TO_MEM_OR_BTF_ID:
>         {
> @@ -14141,6 +14188,24 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
>                         goto patch_call_imm;
>                 }
>
> +               if (insn->imm == BPF_FUNC_dynptr_from_skb) {
> +                       bool is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
> +
> +                       insn_buf[0] = BPF_MOV32_IMM(BPF_REG_4, is_rdonly);
> +                       insn_buf[1] = *insn;
> +                       cnt = 2;

This might have been discussed before, sorry if I missed it. But why
this special rewrite to provide read-only flag vs having two
implementations of bpf_dynptr_from_skb() depending on program types?
If program type allows read+write access, return
bpf_dynptr_from_skb_rdwr(), for those that have read-only access -
bpf_dynptr_from_skb_rdonly(), and for others return NULL proto
(disable helper)?

You can then use it very similarly for bpf_dynptr_from_skb_meta().

> +
> +                       new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
> +                       if (!new_prog)
> +                               return -ENOMEM;
> +
> +                       delta += cnt - 1;
> +                       env->prog = new_prog;
> +                       prog = new_prog;
> +                       insn = new_prog->insnsi + i + delta;
> +                       goto patch_call_imm;
> +               }
> +

[...]

>  BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
>  {
>         return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
> @@ -7726,6 +7763,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_get_socket_uid_proto;
>         case BPF_FUNC_perf_event_output:
>                 return &bpf_skb_event_output_proto;
> +       case BPF_FUNC_dynptr_from_skb:
> +               return &bpf_dynptr_from_skb_proto;
>         default:
>                 return bpf_sk_base_func_proto(func_id);
>         }
> @@ -7909,6 +7948,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_tcp_raw_check_syncookie_ipv6_proto;
>  #endif
>  #endif
> +       case BPF_FUNC_dynptr_from_skb:
> +               return &bpf_dynptr_from_skb_proto;
>         default:
>                 return bpf_sk_base_func_proto(func_id);
>         }
> @@ -8104,6 +8145,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>         case BPF_FUNC_skc_lookup_tcp:
>                 return &bpf_skc_lookup_tcp_proto;
>  #endif
> +       case BPF_FUNC_dynptr_from_skb:
> +               return &bpf_dynptr_from_skb_proto;
>         default:
>                 return bpf_sk_base_func_proto(func_id);
>         }
> @@ -8142,6 +8185,8 @@ lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_get_smp_processor_id_proto;
>         case BPF_FUNC_skb_under_cgroup:
>                 return &bpf_skb_under_cgroup_proto;
> +       case BPF_FUNC_dynptr_from_skb:
> +               return &bpf_dynptr_from_skb_proto;

so like here you'd return read-only prototype for dynptr_from_skb
(seems like LWT programs have only read-only access, according to
may_access_direct_pkt_data), but in cases where
may_access_direct_pkt_data() allows read-write access we'd choose rdwr
variant of dynptr_from_skb?

>         default:
>                 return bpf_sk_base_func_proto(func_id);
>         }

[...]



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux