Re: bpf_fib_lookup support for firewall mark

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Daniel, BPF list,

Over the last week or so David Ahern and I worked on a patchset that solves
the problem discussed here along with a self-test.

Attached here is a patchset of 3 files which covers the following:

[PATCH 1/3] bpf: Add support for mark with bpf_fib_lookup
[PATCH 2/3] tools: Update bpf header
[PATCH 3/3] selftests: Add selftests for fwmark support in bpf_fib_lookup

I have tested those against a very recent clone of the latest 5.13-rc7.
Self-test results look like this:

----
# ./test_bpf_fib_lookup.sh
[test_bpf_fib_lookup.sh] START
- Running test_egress_ipv4_fwmark
  * mark 0: PASS
  * mark 2: PASS
- Running test_egress_ipv6_fwmark
  * mark 0: PASS
  * mark 2: PASS
[test_bpf_fib_lookup.sh] PASS: 4 -- FAIL: 0
# echo $?
0
----

Let me know what you think and if there's anything else needed to incorporate
the patchset into the kernel as well as what you think the next steps should be.

Regards,
Rumen Telbizov
From 417af137273a310e0acc9bc068c02777f813b004 Mon Sep 17 00:00:00 2001
From: Rumen Telbizov <telbizov@gmail.com>
Date: Mon, 28 Jun 2021 12:19:39 -0700
Subject: [PATCH 2/3] tools: Update bpf header

Update bpf header under tools to bring in the bpf_fib_lookup
struct changes.

Signed-off-by: Rumen Telbizov <telbizov@gmail.com>
---
 tools/include/uapi/linux/bpf.h | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ec6d85a81744..6c78cc9c3c75 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5925,8 +5925,20 @@ struct bpf_fib_lookup {
 	/* output */
 	__be16	h_vlan_proto;
 	__be16	h_vlan_TCI;
-	__u8	smac[6];     /* ETH_ALEN */
-	__u8	dmac[6];     /* ETH_ALEN */
+
+	union {
+		/* input */
+		struct {
+			__u32	mark;   /* fwmark for policy routing */
+			/* 2 4-byte holes for input */
+		};
+
+		/* output: source and dest mac */
+		struct {
+			__u8	smac[6];	/* ETH_ALEN */
+			__u8	dmac[6];	/* ETH_ALEN */
+		};
+	};
 };
 
 struct bpf_redir_neigh {
-- 
2.30.1 (Apple Git-130)

From da85375a1cd2440881c37b1a6203196b05b35531 Mon Sep 17 00:00:00 2001
From: Rumen Telbizov <telbizov@gmail.com>
Date: Mon, 28 Jun 2021 12:20:57 -0700
Subject: [PATCH 3/3] selftests: Add selftests for fwmark support in
 bpf_fib_lookup

Add selftests for ensuring:
     * IPv4 route match according to ip rule fwmark
     * IPv6 route match according to ip rule fwmark

Signed-off-by: Rumen Telbizov <telbizov@gmail.com>
---
 tools/testing/selftests/bpf/Makefile          |   1 +
 .../selftests/bpf/progs/test_bpf_fib_lookup.c | 135 ++++++++++++++
 .../selftests/bpf/test_bpf_fib_lookup.sh      | 166 ++++++++++++++++++
 3 files changed, 302 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_bpf_fib_lookup.c
 create mode 100755 tools/testing/selftests/bpf/test_bpf_fib_lookup.sh

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 511259c2c6c5..afbac539e20d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -73,6 +73,7 @@ TEST_PROGS := test_kmod.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
 	test_bpftool_metadata.sh \
+	test_bpf_fib_lookup.sh \
 	test_doc_build.sh \
 	test_xsk.sh
 
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_fib_lookup.c b/tools/testing/selftests/bpf/progs/test_bpf_fib_lookup.c
new file mode 100644
index 000000000000..e4bbfb01ab86
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_fib_lookup.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * @author:  Rumen Telbizov <telbizov@gmail.com> <rumen.telbizov@menlosecurity.com>
+ * @created: Wed Jun 23 17:33:19 UTC 2021
+ *
+ * @description:
+ * Perform tests against bpf_fib_lookup()
+ * Communicates the results back via the trace buffer for the calling script
+ * to parse - /sys/kernel/debug/tracing/trace
+ *
+ */
+
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+#define BPF_TRACE(fmt, ...) \
+({ \
+	static const char ____fmt[] = fmt; \
+	bpf_trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
+})
+
+SEC("test_egress_ipv4_fwmark")
+int __test_egress_ipv4_fwmark(struct __sk_buff *skb)
+{
+	void *data      = (void *)(long)skb->data;
+	void *data_end  = (void *)(long)skb->data_end;
+	struct bpf_fib_lookup fib;
+	struct ethhdr *eth = data;
+	struct iphdr *ip = data + sizeof(*eth);
+
+	if (data + sizeof(*eth) > data_end)
+		return TC_ACT_OK;
+
+	if (eth->h_proto != htons(ETH_P_IP))
+		return TC_ACT_OK;
+
+	if (data + sizeof(*eth) + sizeof(*ip) > data_end)
+		return TC_ACT_OK;
+
+	if (ip->protocol != IPPROTO_ICMP)
+		return TC_ACT_OK;
+
+	if (htonl(ip->daddr) != 0x01020304)
+		return TC_ACT_OK;
+
+	__builtin_memset(&fib, 0x0, sizeof(fib));
+
+	fib.family      = AF_INET;
+	fib.l4_protocol = ip->protocol;
+	fib.tot_len     = htons(ip->tot_len);
+	fib.ifindex     = skb->ifindex;
+	fib.tos         = ip->tos;
+	fib.ipv4_src    = ip->saddr;
+	fib.ipv4_dst    = ip->daddr;
+	fib.mark        = skb->mark;
+
+	if (bpf_fib_lookup(skb, &fib, sizeof(fib), 0) < 0)
+		return TC_ACT_OK;
+
+	BPF_TRACE("<test_bpf_fib_lookup: test_egress_ipv4_fwmark> fib.ipv4_dst: <%x> mark: <%d>",
+		  htonl(fib.ipv4_dst), skb->mark);
+	return TC_ACT_OK;
+}
+
+SEC("test_egress_ipv6_fwmark")
+int __test_egress_ipv6_fwmark(struct __sk_buff *skb)
+{
+	void *data      = (void *)(long)skb->data;
+	void *data_end  = (void *)(long)skb->data_end;
+	struct in6_addr *src, *dst;
+	struct bpf_fib_lookup fib;
+	struct ethhdr *eth = data;
+	struct ipv6hdr *ip = data + sizeof(*eth);
+
+	if (data + sizeof(*eth) > data_end)
+		return TC_ACT_OK;
+
+	if (eth->h_proto != htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	if (data + sizeof(*eth) + sizeof(*ip) > data_end)
+		return TC_ACT_OK;
+
+	if (ip->nexthdr != IPPROTO_ICMPV6)
+		return TC_ACT_OK;
+
+	/* 2000::2000 */
+	if (!(ntohs(ip->daddr.s6_addr16[0]) == 0x2000 &&
+	      ntohs(ip->daddr.s6_addr16[1]) == 0x0000 &&
+	      ntohs(ip->daddr.s6_addr16[2]) == 0x0000 &&
+	      ntohs(ip->daddr.s6_addr16[3]) == 0x0000 &&
+	      ntohs(ip->daddr.s6_addr16[4]) == 0x0000 &&
+	      ntohs(ip->daddr.s6_addr16[5]) == 0x0000 &&
+	      ntohs(ip->daddr.s6_addr16[6]) == 0x0000 &&
+	      ntohs(ip->daddr.s6_addr16[7]) == 0x2000))
+		return TC_ACT_OK;
+
+	__builtin_memset(&fib, 0x0, sizeof(fib));
+
+	fib.family      = AF_INET6;
+	fib.flowinfo    = 0;
+	fib.l4_protocol = ip->nexthdr;
+	fib.tot_len     = ntohs(ip->payload_len);
+	fib.ifindex     = skb->ifindex;
+	fib.mark        = skb->mark;
+
+	src = (struct in6_addr *)fib.ipv6_src;
+	dst = (struct in6_addr *)fib.ipv6_dst;
+	*src = ip->saddr;
+	*dst = ip->daddr;
+
+	if (bpf_fib_lookup(skb, &fib, sizeof(fib), 0) < 0)
+		return TC_ACT_OK;
+
+	BPF_TRACE("<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<0-2>: <%04x:%04x:%04x>",
+		  ntohs(dst->s6_addr16[0]), ntohs(dst->s6_addr16[1]),
+		  ntohs(dst->s6_addr16[2])
+	);
+	BPF_TRACE("<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<3-5>: <%04x:%04x:%04x>",
+		  ntohs(dst->s6_addr16[3]), ntohs(dst->s6_addr16[4]),
+		  ntohs(dst->s6_addr16[5])
+	);
+	BPF_TRACE("<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<6-7>: <%04x:%04x> mark: <%d>",
+		  ntohs(dst->s6_addr16[6]), ntohs(dst->s6_addr16[7]), skb->mark
+	);
+
+	return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpf_fib_lookup.sh b/tools/testing/selftests/bpf/test_bpf_fib_lookup.sh
new file mode 100755
index 000000000000..4b8cc984b486
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpf_fib_lookup.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# @author:  Rumen Telbizov <telbizov@gmail.com> <rumen.telbizov@menlosecurity.com>
+# @created: Wed Jun 23 17:33:19 UTC 2021
+# @description:
+# Test coverage for bpf_fib_lookup():
+#  * IPv4 route match according to ip rule fwmark
+#  * IPv6 route match according to ip rule fwmark
+#
+
+#
+# Global Variables
+#
+PASS=0
+FAIL=0
+
+CYAN='\033[0;36m'
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m'
+
+#
+# Functions
+#
+setup() {
+    ip netns add ns1
+    ip netns add ns2
+
+    ip link add veth1 index 100 type veth peer name veth2 index 200
+    ip link set veth1 netns ns1 up
+    ip link set veth2 netns ns2 up
+
+    ip netns exec ns1 sysctl net.ipv4.ip_forward=1 >/dev/null
+    ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 >/dev/null
+
+    ip netns exec ns1 ip addr  add dev veth1 192.168.0.100/24
+    ip netns exec ns2 ip addr  add dev veth2 192.168.0.1/24
+    ip netns exec ns2 ip addr  add dev veth2 192.168.0.2/24
+
+    ip netns exec ns1 ip route add default via 192.168.0.1
+    ip netns exec ns1 ip route add default via 192.168.0.2 table 2
+
+
+    ip netns exec ns1 ip -6 addr add dev veth1 fd00::100/64 nodad
+    ip netns exec ns2 ip -6 addr add dev veth2 fd00::1/64   nodad
+    ip netns exec ns2 ip -6 addr add dev veth2 fd00::2/64   nodad
+
+    ip netns exec ns1 ip -6 route add default via fd00::1
+    ip netns exec ns1 ip -6 route add default via fd00::2 table 2
+
+    ip netns exec ns1 ip    rule add prio 2 fwmark 2 lookup 2
+    ip netns exec ns1 ip -6 rule add prio 2 fwmark 2 lookup 2
+
+    ip netns exec ns1 tc qdisc  add dev veth1 clsact
+}
+
+
+cleanup() {
+    echo > /sys/kernel/debug/tracing/trace
+    ip netns del ns1 2>/dev/null
+    ip netns del ns2 2>/dev/null
+}
+
+
+test_egress_ipv4_fwmark() {
+    echo -e "- Running ${CYAN}${FUNCNAME[0]}${NC}"
+    ip netns exec ns1 tc filter del dev veth1 egress
+    ip netns exec ns1 tc filter add dev veth1 egress \
+	bpf da obj test_bpf_fib_lookup.o sec test_egress_ipv4_fwmark
+
+    echo -n "  * mark 0: "
+    echo > /sys/kernel/debug/tracing/trace
+    ip netns exec ns1 ping -W 0.1 -c 1 1.2.3.4 >/dev/null
+    grep -q '<test_bpf_fib_lookup: test_egress_ipv4_fwmark> fib.ipv4_dst: <c0a80001> mark: <0>' \
+        /sys/kernel/debug/tracing/trace
+    if [ $? -eq 0 ]; then
+        PASS=$(($PASS+1))
+        echo -e ${GREEN}"PASS"${NC}
+    else
+        FAIL=$(($FAIL+1))
+        echo -e ${RED}"FAIL"${NC}
+    fi
+
+    echo -n "  * mark 2: "
+    echo > /sys/kernel/debug/tracing/trace
+    ip netns exec ns1 ping -W 0.1 -c 1 1.2.3.4 -m 2 >/dev/null
+    grep -q '<test_bpf_fib_lookup: test_egress_ipv4_fwmark> fib.ipv4_dst: <c0a80002> mark: <2>' \
+        /sys/kernel/debug/tracing/trace
+    if [ $? -eq 0 ]; then
+        PASS=$(($PASS+1))
+        echo -e ${GREEN}"PASS"${NC}
+    else
+        FAIL=$(($FAIL+1))
+        echo -e ${RED}"FAIL"${NC}
+    fi
+}
+
+
+test_egress_ipv6_fwmark() {
+    echo -e "- Running ${CYAN}${FUNCNAME[0]}${NC}"
+    ip netns exec ns1 tc filter del dev veth1 egress
+    ip netns exec ns1 tc filter add dev veth1 egress \
+	bpf da obj test_bpf_fib_lookup.o sec test_egress_ipv6_fwmark
+
+    echo -n "  * mark 0: "
+    echo > /sys/kernel/debug/tracing/trace
+    ip netns exec ns1 ping -6 -W 0.1 -c 1 2000::2000 >/dev/null
+    grep -q '<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<0-2>: <fd00:0000:0000>' \
+        /sys/kernel/debug/tracing/trace
+    rc1=$?
+    grep -q '<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<3-5>: <0000:0000:0000>' \
+        /sys/kernel/debug/tracing/trace
+    rc2=$?
+    grep -q '<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<6-7>: <0000:0001> mark: <0>' \
+        /sys/kernel/debug/tracing/trace
+    rc3=$?
+    if [ $rc1 -eq 0 ] && [ $rc2 -eq 0 ] && [ $rc3 -eq 0 ]; then
+        PASS=$(($PASS+1))
+        echo -e ${GREEN}"PASS"${NC}
+    else
+        FAIL=$(($FAIL+1))
+        echo -e ${RED}"FAIL"${NC}
+    fi
+
+    echo -n "  * mark 2: "
+    echo > /sys/kernel/debug/tracing/trace
+    ip netns exec ns1 ping -6 -W 0.1 -c 1 2000::2000 -m 2 >/dev/null
+    grep -q '<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<0-2>: <fd00:0000:0000>' \
+        /sys/kernel/debug/tracing/trace
+    rc1=$?
+    grep -q '<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<3-5>: <0000:0000:0000>' \
+        /sys/kernel/debug/tracing/trace
+    rc2=$?
+    grep -q '<test_bpf_fib_lookup - egress_IPv6> fib.ipv6_dst<6-7>: <0000:0002> mark: <2>' \
+        /sys/kernel/debug/tracing/trace
+    rc3=$?
+    if [ $rc1 -eq 0 ] && [ $rc2 -eq 0 ] && [ $rc3 -eq 0 ]; then
+        PASS=$(($PASS+1))
+        echo -e ${GREEN}"PASS"${NC}
+    else
+        FAIL=$(($FAIL+1))
+        echo -e ${RED}"FAIL"${NC}
+    fi
+}
+
+#
+# MAIN
+#
+
+trap cleanup 0 3 6 2 9
+echo "[$(basename $0)] START"
+
+cleanup
+setup
+
+test_egress_ipv4_fwmark
+test_egress_ipv6_fwmark
+
+cleanup
+
+echo "[$(basename $0)] PASS: $PASS -- FAIL: $FAIL"
+if [ $FAIL -gt 0 ]; then
+    exit 1
+fi
+exit 0
-- 
2.30.1 (Apple Git-130)

From 4df801e52670c7a81c815dd1554871d329665389 Mon Sep 17 00:00:00 2001
From: Rumen Telbizov <telbizov@gmail.com>
Date: Mon, 28 Jun 2021 12:18:44 -0700
Subject: [PATCH 1/3] bpf: Add support for mark with bpf_fib_lookup

Add support for policy routing via marks to the bpf_fib_lookup
helper. The bpf_fib_lookup struct is constrained to 64B for
performance. Since the smac and dmac entries are used only for
output, put them in an anonymous struct and then add a union
around a second struct that contains the mark to use in the FIB
lookup.

Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Rumen Telbizov <telbizov@gmail.com>
---
 include/uapi/linux/bpf.h | 16 ++++++++++++++--
 net/core/filter.c        |  4 ++--
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ec6d85a81744..6c78cc9c3c75 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5925,8 +5925,20 @@ struct bpf_fib_lookup {
 	/* output */
 	__be16	h_vlan_proto;
 	__be16	h_vlan_TCI;
-	__u8	smac[6];     /* ETH_ALEN */
-	__u8	dmac[6];     /* ETH_ALEN */
+
+	union {
+		/* input */
+		struct {
+			__u32	mark;   /* fwmark for policy routing */
+			/* 2 4-byte holes for input */
+		};
+
+		/* output: source and dest mac */
+		struct {
+			__u8	smac[6];	/* ETH_ALEN */
+			__u8	dmac[6];	/* ETH_ALEN */
+		};
+	};
 };
 
 struct bpf_redir_neigh {
diff --git a/net/core/filter.c b/net/core/filter.c
index 65ab4e21c087..2ea997cacf4d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5299,6 +5299,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	fl4.saddr = params->ipv4_src;
 	fl4.fl4_sport = params->sport;
 	fl4.fl4_dport = params->dport;
+	fl4.flowi4_mark = params->mark;
 	fl4.flowi4_multipath_hash = 0;
 
 	if (flags & BPF_FIB_LOOKUP_DIRECT) {
@@ -5311,7 +5312,6 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 
 		err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
 	} else {
-		fl4.flowi4_mark = 0;
 		fl4.flowi4_secid = 0;
 		fl4.flowi4_tun_key.tun_id = 0;
 		fl4.flowi4_uid = sock_net_uid(net, NULL);
@@ -5425,6 +5425,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 	fl6.saddr = *src;
 	fl6.fl6_sport = params->sport;
 	fl6.fl6_dport = params->dport;
+	fl6.flowi6_mark = params->mark;
 
 	if (flags & BPF_FIB_LOOKUP_DIRECT) {
 		u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
@@ -5437,7 +5438,6 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 		err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
 						   strict);
 	} else {
-		fl6.flowi6_mark = 0;
 		fl6.flowi6_secid = 0;
 		fl6.flowi6_tun_key.tun_id = 0;
 		fl6.flowi6_uid = sock_net_uid(net, NULL);
-- 
2.30.1 (Apple Git-130)


[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux