Re: [PATCH bpf-next v9 4/5] bpf: Add selftests for raw syncookie helpers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2022-05-07 00:34, Andrii Nakryiko wrote:
On Tue, May 3, 2022 at 10:15 AM Maxim Mikityanskiy <maximmi@xxxxxxxxxx> wrote:

This commit adds selftests for the new BPF helpers:
bpf_tcp_raw_{gen,check}_syncookie_ipv{4,6}.

xdp_synproxy_kern.c is a BPF program that generates SYN cookies on
allowed TCP ports and sends SYNACKs to clients, accelerating synproxy
iptables module.

xdp_synproxy.c is a userspace control application that allows to
configure the following options in runtime: list of allowed ports, MSS,
window scale, TTL.

A selftest is added to prog_tests that leverages the above programs to
test the functionality of the new helpers.

Signed-off-by: Maxim Mikityanskiy <maximmi@xxxxxxxxxx>
Reviewed-by: Tariq Toukan <tariqt@xxxxxxxxxx>
---

selftests should use "selftests/bpf: " subject prefix, not "bpf: ",
please update so it's more obvious that this patch touches selftests
and not kernel-side BPF functionality.

  tools/testing/selftests/bpf/.gitignore        |   1 +
  tools/testing/selftests/bpf/Makefile          |   5 +-
  .../selftests/bpf/prog_tests/xdp_synproxy.c   | 109 +++
  .../selftests/bpf/progs/xdp_synproxy_kern.c   | 750 ++++++++++++++++++
  tools/testing/selftests/bpf/xdp_synproxy.c    | 418 ++++++++++
  5 files changed, 1281 insertions(+), 2 deletions(-)
  create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
  create mode 100644 tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
  create mode 100644 tools/testing/selftests/bpf/xdp_synproxy.c

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 595565eb68c0..ca2f47f45670 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -43,3 +43,4 @@ test_cpp
  *.tmp
  xdpxceiver
  xdp_redirect_multi
+xdp_synproxy
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index bafdc5373a13..8ae602843b16 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,9 +82,9 @@ TEST_PROGS_EXTENDED := with_addr.sh \
  TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
         flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
         test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-       xdpxceiver xdp_redirect_multi
+       xdpxceiver xdp_redirect_multi xdp_synproxy

-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/xdp_synproxy

  # Emit succinct information message describing current building step
  # $1 - generic step name (e.g., CC, LINK, etc);
@@ -500,6 +500,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c      \
                          cap_helpers.c
  TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
                        $(OUTPUT)/liburandom_read.so                     \
+                      $(OUTPUT)/xdp_synproxy                           \

this is the right way to make external binary available to test_progs
flavors, but is there anything inherently requiring external binary
instead of having a helper function doing the same? urandom_read has
to be a separate binary.

If you remember v1, it used to be a sample, but I was asked to convert it to a selftest, because samples are deprecated. The intention of having this separate binary is to have a sample reference implementation that can be used in real-world scenarios with minor or no changes.

                        ima_setup.sh                                     \
                        $(wildcard progs/btf_dump_test_case_*.c)
  TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
new file mode 100644
index 000000000000..e08b28e25047
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#define SYS(cmd) ({ \
+       if (!ASSERT_OK(system(cmd), (cmd))) \
+               goto out; \
+})
+
+#define SYS_OUT(cmd) ({ \
+       FILE *f = popen((cmd), "r"); \
+       if (!ASSERT_OK_PTR(f, (cmd))) \
+               goto out; \
+       f; \
+})
+
+static bool expect_str(char *buf, size_t size, const char *str)
+{
+       if (size != strlen(str))
+               return false;
+       return !memcmp(buf, str, size);
+}
+
+void test_xdp_synproxy(void)
+{
+       int server_fd = -1, client_fd = -1, accept_fd = -1;
+       struct nstoken *ns = NULL;
+       FILE *ctrl_file = NULL;
+       char buf[1024];
+       size_t size;
+
+       SYS("ip netns add synproxy");
+
+       SYS("ip link add tmp0 type veth peer name tmp1");
+       SYS("ip link set tmp1 netns synproxy");
+       SYS("ip link set tmp0 up");
+       SYS("ip addr replace 198.18.0.1/24 dev tmp0");

+
+       // When checksum offload is enabled, the XDP program sees wrong
+       // checksums and drops packets.
+       SYS("ethtool -K tmp0 tx off");
+       // Workaround required for veth.

don't use C++ comments, please stick to /* */

+       SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null");
+
+       ns = open_netns("synproxy");
+       if (!ASSERT_OK_PTR(ns, "setns"))
+               goto out;
+
+       SYS("ip link set lo up");
+       SYS("ip link set tmp1 up");
+       SYS("ip addr replace 198.18.0.2/24 dev tmp1");
+       SYS("sysctl -w net.ipv4.tcp_syncookies=2");
+       SYS("sysctl -w net.ipv4.tcp_timestamps=1");
+       SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+       SYS("iptables -t raw -I PREROUTING \
+           -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
+       SYS("iptables -t filter -A INPUT \
+           -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
+           -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
+       SYS("iptables -t filter -A INPUT \
+           -i tmp1 -m state --state INVALID -j DROP");
+
+       ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 --single \
+                           --mss4 1460 --mss6 1440 --wscale 7 --ttl 64");
+       size = fread(buf, 1, sizeof(buf), ctrl_file);

buf is uninitialized so if fread fail strlen() can cause SIGSEGV or
some other failure mode

No, it will exit on the assert below (size won't be equal to strlen(str)).


+       pclose(ctrl_file);
+       if (!ASSERT_TRUE(expect_str(buf, size, "Total SYNACKs generated: 0\n"),
+                        "initial SYNACKs"))
+               goto out;
+
+       server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);
+       if (!ASSERT_GE(server_fd, 0, "start_server"))
+               goto out;
+
+       close_netns(ns);
+       ns = NULL;
+
+       client_fd = connect_to_fd(server_fd, 10000);
+       if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+               goto out;
+
+       accept_fd = accept(server_fd, NULL, NULL);
+       if (!ASSERT_GE(accept_fd, 0, "accept"))
+               goto out;
+
+       ns = open_netns("synproxy");
+       if (!ASSERT_OK_PTR(ns, "setns"))
+               goto out;
+
+       ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");
+       size = fread(buf, 1, sizeof(buf), ctrl_file);
+       pclose(ctrl_file);
+       if (!ASSERT_TRUE(expect_str(buf, size, "Total SYNACKs generated: 1\n"),
+                        "SYNACKs after connection"))

please use ASSERT_STREQ instead, same above

It doesn't fit here for two reasons:

* It doesn't consider size (and ignoring size will cause a UB on errors because of the uninitialized buf).

* buf is not '\0'-terminated, and ASSERT_STREQ uses strcmp.


+               goto out;
+
+out:
+       if (accept_fd >= 0)
+               close(accept_fd);
+       if (client_fd >= 0)
+               close(client_fd);
+       if (server_fd >= 0)
+               close(server_fd);
+       if (ns)
+               close_netns(ns);
+
+       system("ip link del tmp0");
+       system("ip netns del synproxy");
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
new file mode 100644
index 000000000000..9ae85b189072
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -0,0 +1,750 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB

Can you please elaborate on what Linux-OpenIB license is and why
GPL-2.0 isn't enough? We usually have GPL-2.0 or LGPL-2.1 OR
BSD-2-Clause

That's the license boilerplate we use in the mlx5e driver. I'll check with the relevant people whether we can submit it as GPL-2.0 solely.

+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <asm/errno.h>
+

[...]

+
+static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+                                              __u32 len, __u8 proto,
+                                              __u32 csum)
+{
+       __u64 s = csum;
+
+       s += (__u32)saddr;
+       s += (__u32)daddr;
+#if defined(__BIG_ENDIAN__)
+       s += proto + len;
+#elif defined(__LITTLE_ENDIAN__)

I've got few nudges in libbpf code base previously to use

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__

instead (I don't remember the exact reason now, but there was a
reason). Let's do the same here for consistency?

OK.

samples/bpf/xdpsock_user.c also still uses __BIG_ENDIAN__.

+       s += (proto + len) << 8;
+#else
+#error Unknown endian
+#endif
+       s = (s & 0xffffffff) + (s >> 32);
+       s = (s & 0xffffffff) + (s >> 32);
+
+       return csum_fold((__u32)s);
+}
+
+static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
+                                            const struct in6_addr *daddr,
+                                            __u32 len, __u8 proto, __u32 csum)
+{
+       __u64 sum = csum;
+       int i;
+
+#pragma unroll
+       for (i = 0; i < 4; i++)
+               sum += (__u32)saddr->in6_u.u6_addr32[i];
+
+#pragma unroll

why unroll? BPF verifier handles such loops just fine, even if
compiler decides to not unroll them

Optimization, see csum_ipv6_magic in net/ipv6/ip6_checksum.c that has this loop unrolled manually.

+       for (i = 0; i < 4; i++)
+               sum += (__u32)daddr->in6_u.u6_addr32[i];
+
+       // Don't combine additions to avoid 32-bit overflow.
+       sum += bpf_htonl(len);
+       sum += bpf_htonl(proto);
+
+       sum = (sum & 0xffffffff) + (sum >> 32);
+       sum = (sum & 0xffffffff) + (sum >> 32);
+
+       return csum_fold((__u32)sum);
+}
+
+static __always_inline __u64 tcp_clock_ns(void)

__always_inline isn't mandatory, you can just have static __u64
tcp_clock_ns() here and let compiler decide on inlining? same for
below

Do you mean just these three functions, or all functions below, or actually all functions in this file?

It's not mandatory, but these are simple one-liners, it would be unpleasant to waste an extra call in performance-critical code if the compiler decides not to inline them.

+{
+       return bpf_ktime_get_ns();
+}
+
+static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
+{
+       return ns / (NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+static __always_inline __u32 tcp_time_stamp_raw(void)
+{
+       return tcp_ns_to_ts(tcp_clock_ns());
+}
+

[...]

+static __always_inline void values_inc_synacks(void)
+{
+       __u32 key = 1;
+       __u32 *value;
+
+       value = bpf_map_lookup_elem(&values, &key);
+       if (value)
+               __sync_fetch_and_add(value, 1);
+}
+
+static __always_inline bool check_port_allowed(__u16 port)
+{
+       __u32 i;
+
+       for (i = 0; i < MAX_ALLOWED_PORTS; i++) {
+               __u32 key = i;
+               __u16 *value;
+
+               value = bpf_map_lookup_elem(&allowed_ports, &key);
+
+               if (!value)
+                       break;
+               // 0 is a terminator value. Check it first to avoid matching on
+               // a forbidden port == 0 and returning true.

please no C++ comments (everywhere)

+               if (*value == 0)
+                       break;
+
+               if (*value == port)
+                       return true;
+       }
+
+       return false;
+}
+

[...]




[Index of Archives]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Device Mapper]

  Powered by Linux