Patch "selftests/net: mv bpf/nat6to4.c to net folder" has been added to the 6.1-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    selftests/net: mv bpf/nat6to4.c to net folder

to the 6.1-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     selftests-net-mv-bpf-nat6to4.c-to-net-folder.patch
and it can be found in the queue-6.1 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.


>From 3c107f36db061603bee7564fbd6388b1f1879fd3 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@xxxxxxxxx>
Date: Wed, 18 Jan 2023 10:09:27 +0800
Subject: selftests/net: mv bpf/nat6to4.c to net folder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From: Hangbin Liu <liuhangbin@xxxxxxxxx>

commit 3c107f36db061603bee7564fbd6388b1f1879fd3 upstream.

There are some issues with the bpf/nat6to4.c building.

1. It use TEST_CUSTOM_PROGS, which will add the nat6to4.o to
   kselftest-list file and run by common run_tests.
2. When building the test via `make -C tools/testing/selftests/
   TARGETS="net"`, the nat6to4.o will be build in selftests/net/bpf/
   folder. But in test udpgro_frglist.sh it refers to ../bpf/nat6to4.o.
   The correct path should be ./bpf/nat6to4.o.
3. If building the test via `make -C tools/testing/selftests/ TARGETS="net"
   install`. The nat6to4.o will be installed to kselftest_install/net/
   folder. Then the udpgro_frglist.sh should refer to ./nat6to4.o.

To fix the confusing test path, let's just move the nat6to4.c to net folder
and build it as TEST_GEN_FILES.

Fixes: edae34a3ed92 ("selftests net: add UDP GRO fraglist + bpf self-tests")
Tested-by: Björn Töpel <bjorn@xxxxxxxxxx>
Signed-off-by: Hangbin Liu <liuhangbin@xxxxxxxxx>
Link: https://lore.kernel.org/r/20230118020927.3971864-1-liuhangbin@xxxxxxxxx
Signed-off-by: Paolo Abeni <pabeni@xxxxxxxxxx>
Signed-off-by: Hardik Garg <hargar@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 tools/testing/selftests/net/Makefile          | 50 ++++++++++++++++++-
 tools/testing/selftests/net/bpf/Makefile      | 14 ------
 .../testing/selftests/net/{bpf => }/nat6to4.c |  0
 tools/testing/selftests/net/Makefile          |   50 ++++
 tools/testing/selftests/net/bpf/Makefile      |   14 -
 tools/testing/selftests/net/bpf/nat6to4.c     |  285 --------------------------
 tools/testing/selftests/net/nat6to4.c         |  285 ++++++++++++++++++++++++++
 tools/testing/selftests/net/udpgro_frglist.sh |    8 
 5 files changed, 337 insertions(+), 305 deletions(-)
 delete mode 100644 tools/testing/selftests/net/bpf/Makefile
 rename tools/testing/selftests/net/{bpf => }/nat6to4.c (100%)

--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -71,14 +71,60 @@ TEST_GEN_FILES += bind_bhash
 TEST_GEN_PROGS += sk_bind_sendto_listen
 TEST_GEN_PROGS += sk_connect_zero_addr
 TEST_PROGS += test_ingress_egress_chaining.sh
+TEST_GEN_FILES += nat6to4.o
 
 TEST_FILES := settings
 
 include ../lib.mk
 
-include bpf/Makefile
-
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
 $(OUTPUT)/bind_bhash: LDLIBS += -lpthread
+
+# Rules to generate bpf obj nat6to4.o
+CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(abspath ../../../lib/bpf)
+APIDIR := $(abspath ../../../include/uapi)
+
+CCINCLUDE += -I../bpf
+CCINCLUDE += -I../../../../usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+	mkdir -p $@
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS)
+	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
+	   $(APIDIR)/linux/bpf.h					       \
+	   | $(BUILD_DIR)/libbpf
+	$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/     \
+		    EXTRA_CFLAGS='-g -O0'				       \
+		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN := $(SCRATCH_DIR)
--- a/tools/testing/selftests/net/bpf/Makefile
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-CLANG ?= clang
-CCINCLUDE += -I../../bpf
-CCINCLUDE += -I../../../../lib
-CCINCLUDE += -I../../../../../usr/include/
-
-TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
-all: $(TEST_CUSTOM_PROGS)
-
-$(OUTPUT)/%.o: %.c
-	$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
-
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
--- a/tools/testing/selftests/net/bpf/nat6to4.c
+++ /dev/null
@@ -1,285 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * This code is taken from the Android Open Source Project and the author
- * (Maciej Żenczykowski) has gave permission to relicense it under the
- * GPLv2. Therefore this program is free software;
- * You can redistribute it and/or modify it under the terms of the GNU
- * General Public License version 2 as published by the Free Software
- * Foundation
-
- * The original headers, including the original license headers, are
- * included below for completeness.
- *
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <linux/bpf.h>
-#include <linux/if.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/pkt_cls.h>
-#include <linux/swab.h>
-#include <stdbool.h>
-#include <stdint.h>
-
-
-#include <linux/udp.h>
-
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_endian.h>
-
-#define IP_DF 0x4000  // Flag: "Don't Fragment"
-
-SEC("schedcls/ingress6/nat_6")
-int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
-{
-	const int l2_header_size =  sizeof(struct ethhdr);
-	void *data = (void *)(long)skb->data;
-	const void *data_end = (void *)(long)skb->data_end;
-	const struct ethhdr * const eth = data;  // used iff is_ethernet
-	const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
-
-	// Require ethernet dst mac address to be our unicast address.
-	if  (skb->pkt_type != PACKET_HOST)
-		return TC_ACT_OK;
-
-	// Must be meta-ethernet IPv6 frame
-	if (skb->protocol != bpf_htons(ETH_P_IPV6))
-		return TC_ACT_OK;
-
-	// Must have (ethernet and) ipv6 header
-	if (data + l2_header_size + sizeof(*ip6) > data_end)
-		return TC_ACT_OK;
-
-	// Ethertype - if present - must be IPv6
-	if (eth->h_proto != bpf_htons(ETH_P_IPV6))
-		return TC_ACT_OK;
-
-	// IP version must be 6
-	if (ip6->version != 6)
-		return TC_ACT_OK;
-	// Maximum IPv6 payload length that can be translated to IPv4
-	if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
-		return TC_ACT_OK;
-	switch (ip6->nexthdr) {
-	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
-	case IPPROTO_UDP:  // address means there is no need to update their checksums.
-	case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
-	case IPPROTO_ESP:  // since there is never a checksum to update.
-		break;
-	default:  // do not know how to handle anything else
-		return TC_ACT_OK;
-	}
-
-	struct ethhdr eth2;  // used iff is_ethernet
-
-	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
-	eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
-
-	struct iphdr ip = {
-		.version = 4,                                                      // u4
-		.ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
-		.tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
-		.tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
-		.id = 0,                                                           // u16
-		.frag_off = bpf_htons(IP_DF),                                          // u16
-		.ttl = ip6->hop_limit,                                             // u8
-		.protocol = ip6->nexthdr,                                          // u8
-		.check = 0,                                                        // u16
-		.saddr = 0x0201a8c0,                            // u32
-		.daddr = 0x0101a8c0,                                         // u32
-	};
-
-	// Calculate the IPv4 one's complement checksum of the IPv4 header.
-	__wsum sum4 = 0;
-
-	for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
-		sum4 += ((__u16 *)&ip)[i];
-
-	// Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
-	ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
-
-	// Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
-	__wsum sum6 = 0;
-	// We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
-	for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
-		sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
-
-	// Note that there is no L4 checksum update: we are relying on the checksum neutrality
-	// of the ipv6 address chosen by netd's ClatdController.
-
-	// Packet mutations begin - point of no return, but if this first modification fails
-	// the packet is probably still pristine, so let clatd handle it.
-	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
-		return TC_ACT_OK;
-	bpf_csum_update(skb, sum6);
-
-	data = (void *)(long)skb->data;
-	data_end = (void *)(long)skb->data_end;
-	if (data + l2_header_size + sizeof(struct iphdr) > data_end)
-		return TC_ACT_SHOT;
-
-	struct ethhdr *new_eth = data;
-
-	// Copy over the updated ethernet header
-	*new_eth = eth2;
-
-	// Copy over the new ipv4 header.
-	*(struct iphdr *)(new_eth + 1) = ip;
-	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
-}
-
-SEC("schedcls/egress4/snat4")
-int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
-{
-	const int l2_header_size =  sizeof(struct ethhdr);
-	void *data = (void *)(long)skb->data;
-	const void *data_end = (void *)(long)skb->data_end;
-	const struct ethhdr *const eth = data;  // used iff is_ethernet
-	const struct iphdr *const ip4 = (void *)(eth + 1);
-
-	// Must be meta-ethernet IPv4 frame
-	if (skb->protocol != bpf_htons(ETH_P_IP))
-		return TC_ACT_OK;
-
-	// Must have ipv4 header
-	if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
-		return TC_ACT_OK;
-
-	// Ethertype - if present - must be IPv4
-	if (eth->h_proto != bpf_htons(ETH_P_IP))
-		return TC_ACT_OK;
-
-	// IP version must be 4
-	if (ip4->version != 4)
-		return TC_ACT_OK;
-
-	// We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
-	if (ip4->ihl != 5)
-		return TC_ACT_OK;
-
-	// Maximum IPv6 payload length that can be translated to IPv4
-	if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
-		return TC_ACT_OK;
-
-	// Calculate the IPv4 one's complement checksum of the IPv4 header.
-	__wsum sum4 = 0;
-
-	for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
-		sum4 += ((__u16 *)ip4)[i];
-
-	// Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
-	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
-	// for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
-	if (sum4 != 0xFFFF)
-		return TC_ACT_OK;
-
-	// Minimum IPv4 total length is the size of the header
-	if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
-		return TC_ACT_OK;
-
-	// We are incapable of dealing with IPv4 fragments
-	if (ip4->frag_off & ~bpf_htons(IP_DF))
-		return TC_ACT_OK;
-
-	switch (ip4->protocol) {
-	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
-	case IPPROTO_GRE:  // address means there is no need to update their checksums.
-	case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
-		break;         // since there is never a checksum to update.
-
-	case IPPROTO_UDP:  // See above comment, but must also have UDP header...
-		if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
-			return TC_ACT_OK;
-		const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
-		// If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
-		// checksum.  Otherwise the network or more likely the NAT64 gateway might
-		// drop the packet because in most cases IPv6/UDP packets with a zero checksum
-		// are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
-		if (!uh->check)
-			return TC_ACT_OK;
-		break;
-
-	default:  // do not know how to handle anything else
-		return TC_ACT_OK;
-	}
-	struct ethhdr eth2;  // used iff is_ethernet
-
-	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
-	eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
-
-	struct ipv6hdr ip6 = {
-		.version = 6,                                    // __u8:4
-		.priority = ip4->tos >> 4,                       // __u8:4
-		.flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
-		.payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
-		.nexthdr = ip4->protocol,                        // __u8
-		.hop_limit = ip4->ttl,                           // __u8
-	};
-	ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
-	ip6.saddr.in6_u.u6_addr32[1] = 0;
-	ip6.saddr.in6_u.u6_addr32[2] = 0;
-	ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
-	ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
-	ip6.daddr.in6_u.u6_addr32[1] = 0;
-	ip6.daddr.in6_u.u6_addr32[2] = 0;
-	ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
-
-	// Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
-	__wsum sum6 = 0;
-	// We'll end up with a non-zero sum due to ip6.version == 6
-	for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
-		sum6 += ((__u16 *)&ip6)[i];
-
-	// Packet mutations begin - point of no return, but if this first modification fails
-	// the packet is probably still pristine, so let clatd handle it.
-	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
-		return TC_ACT_OK;
-
-	// This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
-	// In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
-	// thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
-	// However, we've already verified the ipv4 checksum is correct and thus 0.
-	// Thus we only need to add the ipv6 header's sum.
-	//
-	// bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
-	// (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
-	bpf_csum_update(skb, sum6);
-
-	// bpf_skb_change_proto() invalidates all pointers - reload them.
-	data = (void *)(long)skb->data;
-	data_end = (void *)(long)skb->data_end;
-
-	// I cannot think of any valid way for this error condition to trigger, however I do
-	// believe the explicit check is required to keep the in kernel ebpf verifier happy.
-	if (data + l2_header_size + sizeof(ip6) > data_end)
-		return TC_ACT_SHOT;
-
-	struct ethhdr *new_eth = data;
-
-	// Copy over the updated ethernet header
-	*new_eth = eth2;
-	// Copy over the new ipv4 header.
-	*(struct ipv6hdr *)(new_eth + 1) = ip6;
-	return TC_ACT_OK;
-}
-
-char _license[] SEC("license") = ("GPL");
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000  // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr * const eth = data;  // used iff is_ethernet
+	const struct ipv6hdr * const ip6 =  (void *)(eth + 1);
+
+	// Require ethernet dst mac address to be our unicast address.
+	if  (skb->pkt_type != PACKET_HOST)
+		return TC_ACT_OK;
+
+	// Must be meta-ethernet IPv6 frame
+	if (skb->protocol != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// Must have (ethernet and) ipv6 header
+	if (data + l2_header_size + sizeof(*ip6) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv6
+	if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	// IP version must be 6
+	if (ip6->version != 6)
+		return TC_ACT_OK;
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+		return TC_ACT_OK;
+	switch (ip6->nexthdr) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_UDP:  // address means there is no need to update their checksums.
+	case IPPROTO_GRE:  // We do not need to bother looking at GRE/ESP headers,
+	case IPPROTO_ESP:  // since there is never a checksum to update.
+		break;
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IP);  // But replace the ethertype
+
+	struct iphdr ip = {
+		.version = 4,                                                      // u4
+		.ihl = sizeof(struct iphdr) / sizeof(__u32),                       // u4
+		.tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4),             // u8
+		.tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)),  // u16
+		.id = 0,                                                           // u16
+		.frag_off = bpf_htons(IP_DF),                                          // u16
+		.ttl = ip6->hop_limit,                                             // u8
+		.protocol = ip6->nexthdr,                                          // u8
+		.check = 0,                                                        // u16
+		.saddr = 0x0201a8c0,                            // u32
+		.daddr = 0x0101a8c0,                                         // u32
+	};
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)&ip)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	ip.check = (__u16)~sum4;                // sum4 cannot be zero, so this is never 0xFFFF
+
+	// Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+	for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+		sum6 += ~((__u16 *)ip6)[i];  // note the bitwise negation
+
+	// Note that there is no L4 checksum update: we are relying on the checksum neutrality
+	// of the ipv6 address chosen by netd's ClatdController.
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+		return TC_ACT_OK;
+	bpf_csum_update(skb, sum6);
+
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+	if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+
+	// Copy over the new ipv4 header.
+	*(struct iphdr *)(new_eth + 1) = ip;
+	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+	const int l2_header_size =  sizeof(struct ethhdr);
+	void *data = (void *)(long)skb->data;
+	const void *data_end = (void *)(long)skb->data_end;
+	const struct ethhdr *const eth = data;  // used iff is_ethernet
+	const struct iphdr *const ip4 = (void *)(eth + 1);
+
+	// Must be meta-ethernet IPv4 frame
+	if (skb->protocol != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// Must have ipv4 header
+	if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+		return TC_ACT_OK;
+
+	// Ethertype - if present - must be IPv4
+	if (eth->h_proto != bpf_htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	// IP version must be 4
+	if (ip4->version != 4)
+		return TC_ACT_OK;
+
+	// We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+	if (ip4->ihl != 5)
+		return TC_ACT_OK;
+
+	// Maximum IPv6 payload length that can be translated to IPv4
+	if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+		return TC_ACT_OK;
+
+	// Calculate the IPv4 one's complement checksum of the IPv4 header.
+	__wsum sum4 = 0;
+
+	for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+		sum4 += ((__u16 *)ip4)[i];
+
+	// Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse u32 into range 1 .. 0x1FFFE
+	sum4 = (sum4 & 0xFFFF) + (sum4 >> 16);  // collapse any potential carry into u16
+	// for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+	if (sum4 != 0xFFFF)
+		return TC_ACT_OK;
+
+	// Minimum IPv4 total length is the size of the header
+	if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+		return TC_ACT_OK;
+
+	// We are incapable of dealing with IPv4 fragments
+	if (ip4->frag_off & ~bpf_htons(IP_DF))
+		return TC_ACT_OK;
+
+	switch (ip4->protocol) {
+	case IPPROTO_TCP:  // For TCP & UDP the checksum neutrality of the chosen IPv6
+	case IPPROTO_GRE:  // address means there is no need to update their checksums.
+	case IPPROTO_ESP:  // We do not need to bother looking at GRE/ESP headers,
+		break;         // since there is never a checksum to update.
+
+	case IPPROTO_UDP:  // See above comment, but must also have UDP header...
+		if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+			return TC_ACT_OK;
+		const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+		// If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+		// checksum.  Otherwise the network or more likely the NAT64 gateway might
+		// drop the packet because in most cases IPv6/UDP packets with a zero checksum
+		// are invalid. See RFC 6935.  TODO: calculate checksum via bpf_csum_diff()
+		if (!uh->check)
+			return TC_ACT_OK;
+		break;
+
+	default:  // do not know how to handle anything else
+		return TC_ACT_OK;
+	}
+	struct ethhdr eth2;  // used iff is_ethernet
+
+	eth2 = *eth;                     // Copy over the ethernet header (src/dst mac)
+	eth2.h_proto = bpf_htons(ETH_P_IPV6);  // But replace the ethertype
+
+	struct ipv6hdr ip6 = {
+		.version = 6,                                    // __u8:4
+		.priority = ip4->tos >> 4,                       // __u8:4
+		.flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0},       // __u8[3]
+		.payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20),  // __be16
+		.nexthdr = ip4->protocol,                        // __u8
+		.hop_limit = ip4->ttl,                           // __u8
+	};
+	ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.saddr.in6_u.u6_addr32[1] = 0;
+	ip6.saddr.in6_u.u6_addr32[2] = 0;
+	ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+	ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+	ip6.daddr.in6_u.u6_addr32[1] = 0;
+	ip6.daddr.in6_u.u6_addr32[2] = 0;
+	ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+	// Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+	__wsum sum6 = 0;
+	// We'll end up with a non-zero sum due to ip6.version == 6
+	for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+		sum6 += ((__u16 *)&ip6)[i];
+
+	// Packet mutations begin - point of no return, but if this first modification fails
+	// the packet is probably still pristine, so let clatd handle it.
+	if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+		return TC_ACT_OK;
+
+	// This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+	// In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+	// thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+	// However, we've already verified the ipv4 checksum is correct and thus 0.
+	// Thus we only need to add the ipv6 header's sum.
+	//
+	// bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+	// (-ENOTSUPP) if it isn't.  So we just ignore the return code (see above for more details).
+	bpf_csum_update(skb, sum6);
+
+	// bpf_skb_change_proto() invalidates all pointers - reload them.
+	data = (void *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+
+	// I cannot think of any valid way for this error condition to trigger, however I do
+	// believe the explicit check is required to keep the in kernel ebpf verifier happy.
+	if (data + l2_header_size + sizeof(ip6) > data_end)
+		return TC_ACT_SHOT;
+
+	struct ethhdr *new_eth = data;
+
+	// Copy over the updated ethernet header
+	*new_eth = eth2;
+	// Copy over the new ipv4 header.
+	*(struct ipv6hdr *)(new_eth + 1) = ip6;
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -40,8 +40,8 @@ run_one() {
 
 	ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
 	tc -n "${PEER_NS}" qdisc add dev veth1 clsact
-	tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6  direct-action
-	tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
+	tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6  direct-action
+	tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action
         echo ${rx_args}
 	ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
 
@@ -88,8 +88,8 @@ if [ ! -f ${BPF_FILE} ]; then
 	exit -1
 fi
 
-if [ ! -f bpf/nat6to4.o ]; then
-	echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first"
+if [ ! -f nat6to4.o ]; then
+	echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first"
 	exit -1
 fi
 


Patches currently in stable-queue which might be from liuhangbin@xxxxxxxxx are

queue-6.1/bonding-fix-macvlan-over-alb-bond-support.patch
queue-6.1/selftests-net-mv-bpf-nat6to4.c-to-net-folder.patch
queue-6.1/selftests-bonding-do-not-set-port-down-before-adding.patch



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux