On Fri, 2022-07-01 at 17:01 +0200, Andrea Mayer wrote: > The SRv6 H.Encaps.Red behavior described in [1] is an optimization of > the SRv6 H.Encaps behavior [2]. > > H.Encaps.Red reduces the length of the SRH by excluding the first > segment (SID) in the SRH of the pushed IPv6 header. The first SID is > only placed in the IPv6 Destination Address field of the pushed IPv6 > header. > When the SRv6 Policy only contains one SID the SRH is omitted, unless > there is an HMAC TLV to be carried. > > [1] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.2 > [2] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.1 > > Signed-off-by: Andrea Mayer <andrea.mayer@xxxxxxxxxxx> > Signed-off-by: Anton Makarov <anton.makarov11235@xxxxxxxxx> > --- > include/uapi/linux/seg6_iptunnel.h | 1 + > net/ipv6/seg6_iptunnel.c | 126 ++++++++++++++++++++++++++++- > 2 files changed, 126 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h > index eb815e0d0ac3..538152a7b2c3 100644 > --- a/include/uapi/linux/seg6_iptunnel.h > +++ b/include/uapi/linux/seg6_iptunnel.h > @@ -35,6 +35,7 @@ enum { > SEG6_IPTUN_MODE_INLINE, > SEG6_IPTUN_MODE_ENCAP, > SEG6_IPTUN_MODE_L2ENCAP, > + SEG6_IPTUN_MODE_ENCAP_RED, > }; > > #endif > diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c > index d64855010948..4942073650d3 100644 > --- a/net/ipv6/seg6_iptunnel.c > +++ b/net/ipv6/seg6_iptunnel.c > @@ -36,6 +36,7 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) > case SEG6_IPTUN_MODE_INLINE: > break; > case SEG6_IPTUN_MODE_ENCAP: > + case SEG6_IPTUN_MODE_ENCAP_RED: > head = sizeof(struct ipv6hdr); > break; > case SEG6_IPTUN_MODE_L2ENCAP: > @@ -195,6 +196,122 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) > } > EXPORT_SYMBOL_GPL(seg6_do_srh_encap); > > +/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ > +static int seg6_do_srh_encap_red(struct sk_buff *skb, > + struct ipv6_sr_hdr *osrh, int proto) > +{ > + __u8 first_seg = osrh->first_segment; > + struct dst_entry *dst = skb_dst(skb); > + struct net *net = dev_net(dst->dev); > + struct ipv6hdr *hdr, *inner_hdr; > + int hdrlen = ipv6_optlen(osrh); > + int red_tlv_offset, tlv_offset; > + struct ipv6_sr_hdr *isrh; > + bool skip_srh = false; > + __be32 flowlabel; > + int tot_len, err; > + int red_hdrlen; > + int tlvs_len; > + > + if (first_seg > 0) { > + red_hdrlen = hdrlen - sizeof(struct in6_addr); > + } else { > + /* NOTE: if tag/flags and/or other TLVs are introduced in the > + * seg6_iptunnel infrastructure, they should be considered when > + * deciding to skip the SRH. > + */ > + skip_srh = !sr_has_hmac(osrh); > + > + red_hdrlen = skip_srh ? 0 : hdrlen; > + } > + > + tot_len = red_hdrlen + sizeof(struct ipv6hdr); > + > + err = skb_cow_head(skb, tot_len + skb->mac_len); > + if (unlikely(err)) > + return err; > + > + inner_hdr = ipv6_hdr(skb); > + flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); > + > + skb_push(skb, tot_len); > + skb_reset_network_header(skb); > + skb_mac_header_rebuild(skb); > + hdr = ipv6_hdr(skb); > + > + /* based on seg6_do_srh_encap() */ > + if (skb->protocol == htons(ETH_P_IPV6)) { > + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), > + flowlabel); > + hdr->hop_limit = inner_hdr->hop_limit; > + } else { > + ip6_flow_hdr(hdr, 0, flowlabel); > + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); > + > + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); > + IP6CB(skb)->iif = skb->skb_iif; > + } > + > + /* no matter if we have to skip the SRH or not, the first segment > + * always comes in the pushed IPv6 header. > + */ > + hdr->daddr = osrh->segments[first_seg]; > + > + if (skip_srh) { > + hdr->nexthdr = proto; > + > + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); > + goto out; > + } > + > + /* we cannot skip the SRH, slow path */ > + > + hdr->nexthdr = NEXTHDR_ROUTING; > + isrh = (void *)hdr + sizeof(struct ipv6hdr); > + > + if (unlikely(!first_seg)) { > + /* this is a very rare case; we have only one SID but > + * we cannot skip the SRH since we are carrying some > + * other info. > + */ > + memcpy(isrh, osrh, hdrlen); > + goto srcaddr; > + } > + > + tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr); > + red_tlv_offset = tlv_offset - sizeof(struct in6_addr); > + > + memcpy(isrh, osrh, red_tlv_offset); > + > + tlvs_len = hdrlen - tlv_offset; > + if (unlikely(tlvs_len > 0)) { > + const void *s = (const void *)osrh + tlv_offset; > + void *d = (void *)isrh + red_tlv_offset; > + > + memcpy(d, s, tlvs_len); > + } > + > + --isrh->first_segment; > + isrh->hdrlen -= 2; > + > +srcaddr: > + isrh->nexthdr = proto; > + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); > + > +#ifdef CONFIG_IPV6_SEG6_HMAC > + if (unlikely(!skip_srh && sr_has_hmac(isrh))) { > + err = seg6_push_hmac(net, &hdr->saddr, isrh); > + if (unlikely(err)) > + return err; > + } > +#endif > + > +out: > + skb_postpush_rcsum(skb, hdr, tot_len); It looks like, at this point hdr->payload_len is not initialized yet - it will be set later by the caller. So the above will corrupt the checksum complete. I think the solution is moving 'payload_len' initialization before the csum update. Note that 'seg6_do_srh_encap' has a similar issue. Paolo