Re: [PATCH bpf-next 1/2] bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()

"Ziyang Xuan (William)" <william.xuanziyang@xxxxxxxxxx> · Mon, 9 Jan 2023 17:19:54 +0800

> On Fri, Jan 6, 2023 at 2:55 PM <sdf@xxxxxxxxxx> wrote:
>>
>> On 01/06, Ziyang Xuan wrote:
>>> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room().
>>> Main use case is for using cls_bpf on ingress hook to decapsulate
>>> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets.
>>
>> CC'd Willem since he has done bpf_skb_adjust_room changes in the past.
>> There might be a lot of GRO/GSO context I'm missing.
>>
>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@xxxxxxxxxx>
>>> ---
>>>   net/core/filter.c | 34 ++++++++++++++++++++++++++++++++--
>>>   1 file changed, 32 insertions(+), 2 deletions(-)
>>
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index 929358677183..73982fb4fe2e 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb,
>>> u32 off, u32 len_diff,
>>>   static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
>>>                             u64 flags)
>>>   {
>>> +     union {
>>> +             struct iphdr *v4;
>>> +             struct ipv6hdr *v6;
>>> +             unsigned char *hdr;
>>> +     } ip;
>>> +     __be16 proto;
>>>       int ret;
>>
>>>       if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
>>> @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff
>>> *skb, u32 off, u32 len_diff,
>>>       if (unlikely(ret < 0))
>>>               return ret;
>>
>>> +     ip.hdr = skb_inner_network_header(skb);
>>> +     if (ip.v4->version == 4)
>>> +             proto = htons(ETH_P_IP);
>>> +     else
>>> +             proto = htons(ETH_P_IPV6);
>>> +
>>>       ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
>>>       if (unlikely(ret < 0))
>>>               return ret;
>>
>>> +     /* Match skb->protocol to new outer l3 protocol */
>>> +     skb->protocol = proto;
>>> +
>>>       if (skb_is_gso(skb)) {
>>>               struct skb_shared_info *shinfo = skb_shinfo(skb);
>>
>>> @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>>          u32, mode, u64, flags)
>>>   {
>>>       u32 len_cur, len_diff_abs = abs(len_diff);
>>> -     u32 len_min = bpf_skb_net_base_len(skb);
>>> -     u32 len_max = BPF_SKB_MAX_LEN;
>>> +     u32 len_min, len_max = BPF_SKB_MAX_LEN;
>>>       __be16 proto = skb->protocol;
>>>       bool shrink = len_diff < 0;
>>> +     union {
>>> +             struct iphdr *v4;
>>> +             struct ipv6hdr *v6;
>>> +             unsigned char *hdr;
>>> +     } ip;
>>>       u32 off;
>>>       int ret;
>>
>>> @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>>                    proto != htons(ETH_P_IPV6)))
>>>               return -ENOTSUPP;
>>
>>> +     if (unlikely(shrink && !skb->encapsulation))
>>> +             return -ENOTSUPP;
>>> +
> 
> This new restriction might break existing users.
> 
> There is no pre-existing requirement that shrink is used solely with
> packets encapsulated by the protocol stack.
> 
> Indeed, skb->encapsulation is likely not set on packets arriving from
> the wire, even if encapsulated. Referring to your comment "Main use
> case is for using cls_bpf on ingress hook to decapsulate"
> 
> Can a combination of the existing bpf_skb_adjust_room and
> bpf_skb_change_proto address your problem?

Hello Willem,

I think combination bpf_skb_adjust_room and bpf_skb_change_proto can not
address my problem.

Now, bpf_skb_adjust_room() would fail for "len_cur - len_diff_abs < len_min"
when decap ipip6 packet, because "len_min" should be sizeof(struct iphdr)
but not sizeof(struct ipv6hdr).

We can remove skb->encapsulation restriction and parse outer and inner IP
header to determine ipip6 and ip6ip packets. As following:

--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3498,6 +3498,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
                              u64 flags)
 {
+       union {
+               struct iphdr *v4;
+               struct ipv6hdr *v6;
+               unsigned char *hdr;
+       } ip;
+       __be16 proto;
        int ret;

        if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
@@ -3515,10 +3521,23 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
        if (unlikely(ret < 0))
                return ret;

+       ip.hdr = skb_network_header(skb);
+       if (ip.v4->version == 4) {
+               if (ip.v4->protocol == IPPROTO_IPV6)
+                       proto = htons(ETH_P_IPV6);
+       } else {
+               struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
+               if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4)
+                       proto = htons(ETH_P_IP);
+       }
+
        ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
        if (unlikely(ret < 0))
                return ret;

+       /* Match skb->protocol to new outer l3 protocol */
+       skb->protocol = proto;
+
        if (skb_is_gso(skb)) {
                struct skb_shared_info *shinfo = skb_shinfo(skb);

@@ -3585,6 +3604,11 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
        u32 len_max = BPF_SKB_MAX_LEN;
        __be16 proto = skb->protocol;
        bool shrink = len_diff < 0;
+       union {
+               struct iphdr *v4;
+               struct ipv6hdr *v6;
+               unsigned char *hdr;
+       } ip;
        u32 off;
        int ret;

@@ -3608,6 +3632,19 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
                return -ENOTSUPP;
        }

+       if (shrink) {
+               ip.hdr = skb_network_header(skb);
+               if (ip.v4->version == 4) {
+                       if (ip.v4->protocol == IPPROTO_IPV6)
+                               len_min = sizeof(struct ipv6hdr);
+               } else {
+                       struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr));
+                       if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4) {
+                               len_min = sizeof(struct iphdr);
+                       }
+               }
+       }
+
        len_cur = skb->len - skb_network_offset(skb);


Look forward to your comments and suggestions.

Thank you!

> 
>>>       off = skb_mac_header_len(skb);
>>>       switch (mode) {
>>>       case BPF_ADJ_ROOM_NET:
>>> @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *,
>>> skb, s32, len_diff,
>>>               return -ENOTSUPP;
>>>       }
>>
>>> +     if (shrink) {
>>> +             ip.hdr = skb_inner_network_header(skb);
>>> +             if (ip.v4->version == 4)
>>> +                     len_min = sizeof(struct iphdr);
>>> +             else
>>> +                     len_min = sizeof(struct ipv6hdr);
>>> +     }
>>> +
>>>       len_cur = skb->len - skb_network_offset(skb);
>>>       if ((shrink && (len_diff_abs >= len_cur ||
>>>                       len_cur - len_diff_abs < len_min)) ||
>>> --
>>> 2.25.1
>>
> .
>