Re: [PATCH v7 bpf-next 2/4] bpf: Support setting variable-length tunnel options

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Sep 11, 2022 at 5:23 AM Shmulik Ladkani
<shmulik@xxxxxxxxxxxxxxxx> wrote:
>
> Existing 'bpf_skb_set_tunnel_opt' allows setting tunnel options given
> an option buffer (ARG_PTR_TO_MEM) and the compile-time fixed buffer
> size (ARG_CONST_SIZE).
>
> However, in certain cases we wish to set tunnel options of dynamic
> length.
>
> For example, we have an ebpf program that gets geneve options on
> incoming packets, stores them into a map (using a key representing
> the incoming flow), and later needs to assign *same* options to
> reply packets (belonging to same flow).
>
> This is currently imposssible without knowing sender's exact geneve
> options length, which unfortunately is dymamic.
>
> Introduce 'bpf_skb_set_tunnel_opt_dynptr'.
>
> This is a variant of 'bpf_skb_set_tunnel_opt' which gets a bpf dynamic
> pointer (ARG_PTR_TO_DYNPTR) parameter whose data points to the options
> buffer to set.
>
> Signed-off-by: Shmulik Ladkani <shmulik.ladkani@xxxxxxxxx>
>
> ---
> v3: Avoid 'inline' for the __bpf_skb_set_tunopt helper function
> v4: change API to be based on bpf_dynptr, suggested by John Fastabend <john.fastabend@xxxxxxxxx>
> v6: Remove superfluous 'len' from bpf_skb_set_tunnel_opt_dynptr API
>     (rely on dynptr's internal size), suggested by Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx>
> ---
>  include/uapi/linux/bpf.h       | 11 +++++++++++
>  net/core/filter.c              | 31 +++++++++++++++++++++++++++++--
>  tools/include/uapi/linux/bpf.h | 11 +++++++++++
>  3 files changed, 51 insertions(+), 2 deletions(-)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3df78c56c1bf..ba12f7e1ccb6 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5387,6 +5387,16 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt)
> + *     Description
> + *             Set tunnel options metadata for the packet associated to *skb*
> + *             to the option data pointed to by the *opt* dynptr.
> + *
> + *             See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> + *             helper for additional information.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
> + *
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5598,6 +5608,7 @@ union bpf_attr {
>         FN(tcp_raw_check_syncookie_ipv4),       \
>         FN(tcp_raw_check_syncookie_ipv6),       \
>         FN(ktime_get_tai_ns),           \
> +       FN(skb_set_tunnel_opt_dynptr),  \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/net/core/filter.c b/net/core/filter.c
> index e872f45399b0..1c652936ef86 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -4674,8 +4674,7 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
>         .arg4_type      = ARG_ANYTHING,
>  };
>
> -BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
> -          const u8 *, from, u32, size)
> +static u64 __bpf_skb_set_tunopt(struct sk_buff *skb, const u8 *from, u32 size)
>  {
>         struct ip_tunnel_info *info = skb_tunnel_info(skb);
>         const struct metadata_dst *md = this_cpu_ptr(md_dst);
> @@ -4690,6 +4689,22 @@ BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
>         return 0;
>  }
>
> +BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
> +          const u8 *, from, u32, size)
> +{
> +       return __bpf_skb_set_tunopt(skb, from, size);
> +}
> +
> +BPF_CALL_2(bpf_skb_set_tunnel_opt_dynptr, struct sk_buff *, skb,
> +          struct bpf_dynptr_kern *, ptr)
> +{
> +       const u8 *from = bpf_dynptr_get_data(ptr);
> +
> +       if (unlikely(!from))
> +               return -EFAULT;
> +       return __bpf_skb_set_tunopt(skb, from, bpf_dynptr_get_size(ptr));
> +}
> +
>  static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
>         .func           = bpf_skb_set_tunnel_opt,
>         .gpl_only       = false,
> @@ -4699,6 +4714,14 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
>         .arg3_type      = ARG_CONST_SIZE,
>  };
>
> +static const struct bpf_func_proto bpf_skb_set_tunnel_opt_dynptr_proto = {
> +       .func           = bpf_skb_set_tunnel_opt_dynptr,
> +       .gpl_only       = false,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_CTX,
> +       .arg2_type      = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL,

being able to accept only DYNPTR_TYPE_LOCAL is quite limiting. We have
RINGBUF type, as well as soon we'll have MALLOC type. Even with SKB
and XDP types there is a linear area that kernel accept directly.

So if feels better to not specify exact type, accept any type, and
then have internal kernel helpers that will return you direct memory
pointer, if it is readable (i.e., for skb/xdp that would mean data
points to linear part).

So basically exactly the same behavior as bpf_dynptr_data() BPF helper.

Also note that dynptr is now CAP_BPF-only, so you might want to make
sure that your new helper is also CAP_BPF-guarded?

> +};
> +
>  static const struct bpf_func_proto *
>  bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
>  {
> @@ -4719,6 +4742,8 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
>                 return &bpf_skb_set_tunnel_key_proto;
>         case BPF_FUNC_skb_set_tunnel_opt:
>                 return &bpf_skb_set_tunnel_opt_proto;
> +       case BPF_FUNC_skb_set_tunnel_opt_dynptr:
> +               return &bpf_skb_set_tunnel_opt_dynptr_proto;
>         default:
>                 return NULL;
>         }
> @@ -7798,6 +7823,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>         case BPF_FUNC_skb_get_tunnel_opt:
>                 return &bpf_skb_get_tunnel_opt_proto;
>         case BPF_FUNC_skb_set_tunnel_opt:
> +       case BPF_FUNC_skb_set_tunnel_opt_dynptr:
>                 return bpf_get_skb_set_tunnel_proto(func_id);
>         case BPF_FUNC_redirect:
>                 return &bpf_redirect_proto;
> @@ -8145,6 +8171,7 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>         case BPF_FUNC_skb_get_tunnel_opt:
>                 return &bpf_skb_get_tunnel_opt_proto;
>         case BPF_FUNC_skb_set_tunnel_opt:
> +       case BPF_FUNC_skb_set_tunnel_opt_dynptr:
>                 return bpf_get_skb_set_tunnel_proto(func_id);
>         case BPF_FUNC_redirect:
>                 return &bpf_redirect_proto;
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index 3df78c56c1bf..ba12f7e1ccb6 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -5387,6 +5387,16 @@ union bpf_attr {
>   *     Return
>   *             Current *ktime*.
>   *
> + * long bpf_skb_set_tunnel_opt_dynptr(struct sk_buff *skb, struct bpf_dynptr *opt)
> + *     Description
> + *             Set tunnel options metadata for the packet associated to *skb*
> + *             to the option data pointed to by the *opt* dynptr.
> + *
> + *             See also the description of the **bpf_skb_get_tunnel_opt**\ ()
> + *             helper for additional information.
> + *     Return
> + *             0 on success, or a negative error in case of failure.
> + *
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5598,6 +5608,7 @@ union bpf_attr {
>         FN(tcp_raw_check_syncookie_ipv4),       \
>         FN(tcp_raw_check_syncookie_ipv6),       \
>         FN(ktime_get_tai_ns),           \
> +       FN(skb_set_tunnel_opt_dynptr),  \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.37.3
>



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux