On Mon, Jan 9, 2023 at 4:20 AM Ziyang Xuan (William) <william.xuanziyang@xxxxxxxxxx> wrote: > > > On Fri, Jan 6, 2023 at 2:55 PM <sdf@xxxxxxxxxx> wrote: > >> > >> On 01/06, Ziyang Xuan wrote: > >>> Add ipip6 and ip6ip decap support for bpf_skb_adjust_room(). > >>> Main use case is for using cls_bpf on ingress hook to decapsulate > >>> IPv4 over IPv6 and IPv6 over IPv4 tunnel packets. > >> > >> CC'd Willem since he has done bpf_skb_adjust_room changes in the past. > >> There might be a lot of GRO/GSO context I'm missing. > >> > >>> Signed-off-by: Ziyang Xuan <william.xuanziyang@xxxxxxxxxx> > >>> --- > >>> net/core/filter.c | 34 ++++++++++++++++++++++++++++++++-- > >>> 1 file changed, 32 insertions(+), 2 deletions(-) > >> > >>> diff --git a/net/core/filter.c b/net/core/filter.c > >>> index 929358677183..73982fb4fe2e 100644 > >>> --- a/net/core/filter.c > >>> +++ b/net/core/filter.c > >>> @@ -3495,6 +3495,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, > >>> u32 off, u32 len_diff, > >>> static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, > >>> u64 flags) > >>> { > >>> + union { > >>> + struct iphdr *v4; > >>> + struct ipv6hdr *v6; > >>> + unsigned char *hdr; > >>> + } ip; > >>> + __be16 proto; > >>> int ret; > >> > >>> if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO | > >>> @@ -3512,10 +3518,19 @@ static int bpf_skb_net_shrink(struct sk_buff > >>> *skb, u32 off, u32 len_diff, > >>> if (unlikely(ret < 0)) > >>> return ret; > >> > >>> + ip.hdr = skb_inner_network_header(skb); > >>> + if (ip.v4->version == 4) > >>> + proto = htons(ETH_P_IP); > >>> + else > >>> + proto = htons(ETH_P_IPV6); > >>> + > >>> ret = bpf_skb_net_hdr_pop(skb, off, len_diff); > >>> if (unlikely(ret < 0)) > >>> return ret; > >> > >>> + /* Match skb->protocol to new outer l3 protocol */ > >>> + skb->protocol = proto; > >>> + > >>> if (skb_is_gso(skb)) { > >>> struct skb_shared_info *shinfo = skb_shinfo(skb); > >> > >>> @@ -3578,10 +3593,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, > >>> skb, s32, len_diff, > >>> u32, mode, u64, flags) > >>> { > >>> u32 len_cur, len_diff_abs = abs(len_diff); > >>> - u32 len_min = bpf_skb_net_base_len(skb); > >>> - u32 len_max = BPF_SKB_MAX_LEN; > >>> + u32 len_min, len_max = BPF_SKB_MAX_LEN; > >>> __be16 proto = skb->protocol; > >>> bool shrink = len_diff < 0; > >>> + union { > >>> + struct iphdr *v4; > >>> + struct ipv6hdr *v6; > >>> + unsigned char *hdr; > >>> + } ip; > >>> u32 off; > >>> int ret; > >> > >>> @@ -3594,6 +3613,9 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, > >>> skb, s32, len_diff, > >>> proto != htons(ETH_P_IPV6))) > >>> return -ENOTSUPP; > >> > >>> + if (unlikely(shrink && !skb->encapsulation)) > >>> + return -ENOTSUPP; > >>> + > > > > This new restriction might break existing users. > > > > There is no pre-existing requirement that shrink is used solely with > > packets encapsulated by the protocol stack. > > > > Indeed, skb->encapsulation is likely not set on packets arriving from > > the wire, even if encapsulated. Referring to your comment "Main use > > case is for using cls_bpf on ingress hook to decapsulate" > > > > Can a combination of the existing bpf_skb_adjust_room and > > bpf_skb_change_proto address your problem? > > Hello Willem, > > I think combination bpf_skb_adjust_room and bpf_skb_change_proto can not > address my problem. > > Now, bpf_skb_adjust_room() would fail for "len_cur - len_diff_abs < len_min" > when decap ipip6 packet, because "len_min" should be sizeof(struct iphdr) > but not sizeof(struct ipv6hdr). > > We can remove skb->encapsulation restriction and parse outer and inner IP > header to determine ipip6 and ip6ip packets. As following: Adding logic for network layer protocol conversion like this looks good to me. bpf_skb_adjust_room already has a few other metadata quirks. But like those, let's make this intent explicit: define a new flag that requests this behavior. Let's avoid introducing a new union. Just use check (ip_hdr(skb)->version == 4). > > --- a/net/core/filter.c > +++ b/net/core/filter.c > @@ -3498,6 +3498,12 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, > static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, > u64 flags) > { > + union { > + struct iphdr *v4; > + struct ipv6hdr *v6; > + unsigned char *hdr; > + } ip; > + __be16 proto; > int ret; > > if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO | > @@ -3515,10 +3521,23 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, > if (unlikely(ret < 0)) > return ret; > > + ip.hdr = skb_network_header(skb); > + if (ip.v4->version == 4) { > + if (ip.v4->protocol == IPPROTO_IPV6) > + proto = htons(ETH_P_IPV6); > + } else { > + struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr)); > + if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4) > + proto = htons(ETH_P_IP); > + } > + > ret = bpf_skb_net_hdr_pop(skb, off, len_diff); > if (unlikely(ret < 0)) > return ret; > > + /* Match skb->protocol to new outer l3 protocol */ > + skb->protocol = proto; > + > if (skb_is_gso(skb)) { > struct skb_shared_info *shinfo = skb_shinfo(skb); > > @@ -3585,6 +3604,11 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, > u32 len_max = BPF_SKB_MAX_LEN; > __be16 proto = skb->protocol; > bool shrink = len_diff < 0; > + union { > + struct iphdr *v4; > + struct ipv6hdr *v6; > + unsigned char *hdr; > + } ip; > u32 off; > int ret; > > @@ -3608,6 +3632,19 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff, > return -ENOTSUPP; > } > > + if (shrink) { > + ip.hdr = skb_network_header(skb); > + if (ip.v4->version == 4) { > + if (ip.v4->protocol == IPPROTO_IPV6) > + len_min = sizeof(struct ipv6hdr); > + } else { > + struct ipv6_opt_hdr *opt_hdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + sizeof(struct ipv6hdr)); > + if (ip.v6->nexthdr == NEXTHDR_DEST && opt_hdr->nexthdr == NEXTHDR_IPV4) { > + len_min = sizeof(struct iphdr); > + } > + } > + } > + > len_cur = skb->len - skb_network_offset(skb); > > > Look forward to your comments and suggestions. > > Thank you! > > > > >>> off = skb_mac_header_len(skb); > >>> switch (mode) { > >>> case BPF_ADJ_ROOM_NET: > >>> @@ -3605,6 +3627,14 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, > >>> skb, s32, len_diff, > >>> return -ENOTSUPP; > >>> } > >> > >>> + if (shrink) { > >>> + ip.hdr = skb_inner_network_header(skb); > >>> + if (ip.v4->version == 4) > >>> + len_min = sizeof(struct iphdr); > >>> + else > >>> + len_min = sizeof(struct ipv6hdr); > >>> + } > >>> + > >>> len_cur = skb->len - skb_network_offset(skb); > >>> if ((shrink && (len_diff_abs >= len_cur || > >>> len_cur - len_diff_abs < len_min)) || > >>> -- > >>> 2.25.1 > >> > > . > >