From: Maciej Żenczykowski <maze@xxxxxxxxxx> IPv4 fragments (20 byte IPv4 header) need to be translated to/from IPv6 fragments (40 byte IPv6 header with additional 8 byte IPv6 fragmentation header). This allows this to be done by adding an extra flag BPF_F_IPV6_FRAGMENT to bpf_skb_change_proto(). I think this is already technically achievable via the use of bpf_skb_adjust_room() which was added in v4.12 commit 2be7e212d541, but this is far easier to use and eliminates the need to call two helper functions, so it's also faster. Cc: Lorenzo Colitti <lorenzo@xxxxxxxxxx> Signed-off-by: Maciej Żenczykowski <maze@xxxxxxxxxx> --- include/uapi/linux/bpf.h | 24 ++++++++++++++++++++++-- net/core/filter.c | 19 ++++++++++--------- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index ba5af15e25f5..0187c2f0a4bc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2188,8 +2188,10 @@ union bpf_attr { * checked and segments are recalculated by the GSO/GRO engine. * The size for GSO target is adapted as well. * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * *flags* may be set to **BPF_F_IPV6_FRAGMENT** to treat ipv6 as + * a 48 byte header instead of the normal 40 (this leaves 8 bytes + * of space for the IPv6 Fragmentation Header). All other bits in + * *flags* are reserved for future usage, and must be left at zero. * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers @@ -5164,6 +5166,24 @@ enum { BPF_F_TUNINFO_IPV6 = (1ULL << 0), }; +/* BPF_FUNC_skb_change_proto flags. */ +enum { + /* Bits 0-15 are reserved for possible future expansion into + * a potential signed 8 bit field, which allows for corrections + * to account for ipv4 options and/or additional ipv6 expansion headers, + * but for now we support *only* the 8 byte ipv6 frag header. + * + * This is most useful, because ipv4 without options supports fragments, + * while ipv6 does not, so the 20 byte ipv4-frag <-> 48 byte ipv6 + * conversion is not a terribly rare case (UDP DNS queries for example). + * + * Only use bits <16 for other purposes if we run out of >15 bits first. + * + * 1ULL << 3 is equal to +8 and is the ipv6 frag header size. + */ + BPF_F_IPV6_FRAGMENT = (1ULL << 3), +}; + /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ enum { BPF_F_SKIP_FIELD_MASK = 0xffULL, diff --git a/net/core/filter.c b/net/core/filter.c index 6102f093d59a..13020368fb4a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3219,9 +3219,8 @@ static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len) return ret; } -static int bpf_skb_proto_4_to_6(struct sk_buff *skb) +static int bpf_skb_proto_4_to_6(struct sk_buff *skb, u32 len_diff) { - const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); u32 off = skb_mac_header_len(skb); int ret; @@ -3249,9 +3248,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return 0; } -static int bpf_skb_proto_6_to_4(struct sk_buff *skb) +static int bpf_skb_proto_6_to_4(struct sk_buff *skb, u32 len_diff) { - const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr); u32 off = skb_mac_header_len(skb); int ret; @@ -3279,17 +3277,17 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return 0; } -static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) +static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto, u32 len_diff) { __be16 from_proto = skb->protocol; if (from_proto == htons(ETH_P_IP) && to_proto == htons(ETH_P_IPV6)) - return bpf_skb_proto_4_to_6(skb); + return bpf_skb_proto_4_to_6(skb, len_diff); if (from_proto == htons(ETH_P_IPV6) && to_proto == htons(ETH_P_IP)) - return bpf_skb_proto_6_to_4(skb); + return bpf_skb_proto_6_to_4(skb, len_diff); return -ENOTSUPP; } @@ -3297,9 +3295,10 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, u64, flags) { + u32 len_diff; int ret; - if (unlikely(flags)) + if (unlikely(flags & ~(BPF_F_IPV6_FRAGMENT))) return -EINVAL; /* General idea is that this helper does the basic groundwork @@ -3319,7 +3318,9 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, * that. For offloads, we mark packet as dodgy, so that headers * need to be verified first. */ - ret = bpf_skb_proto_xlat(skb, proto); + len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr) + + ((flags & BPF_F_IPV6_FRAGMENT) ? sizeof(struct frag_hdr) : 0); + ret = bpf_skb_proto_xlat(skb, proto, len_diff); bpf_compute_data_pointers(skb); return ret; } -- 2.34.0.rc2.393.gf8c9666880-goog