From: Daniel Borkmann <daniel@xxxxxxxxxxxxx> We use two programs to check that the new reuseport logic is executed appropriately. The first is a TC clsact program which bpf_sk_assigns the skb to a UDP or TCP socket created by user space. Since the test communicates via lo we see both directions of packets in the eBPF. Traffic ingressing to the reuseport socket is identified by looking at the destination port. For TCP, we additionally need to make sure that we only assign the initial SYN packets towards our listening socket. The network stack then creates a request socket which transitions to ESTABLISHED after the 3WHS. The second is a reuseport program which shares the fact that it has been executed with user space. This tells us that the delayed lookup mechanism is working. Signed-off-by: Daniel Borkmann <daniel@xxxxxxxxxxxxx> Co-developed-by: Lorenz Bauer <lmb@xxxxxxxxxxxxx> Signed-off-by: Lorenz Bauer <lmb@xxxxxxxxxxxxx> Cc: Joe Stringer <joe@xxxxxxxxx> --- tools/testing/selftests/bpf/network_helpers.c | 3 + .../selftests/bpf/prog_tests/assign_reuse.c | 280 ++++++++++++++++++ .../selftests/bpf/progs/test_assign_reuse.c | 142 +++++++++ 3 files changed, 425 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/assign_reuse.c create mode 100644 tools/testing/selftests/bpf/progs/test_assign_reuse.c diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index a105c0cd008a..8a33bcea97de 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -423,6 +423,9 @@ struct nstoken *open_netns(const char *name) void close_netns(struct nstoken *token) { + if (!token) + return; + ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns"); close(token->orig_netns_fd); free(token); diff --git a/tools/testing/selftests/bpf/prog_tests/assign_reuse.c b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c new file mode 100644 index 000000000000..2cb9bb591e71 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <uapi/linux/if_link.h> +#include <test_progs.h> + +#include <netinet/tcp.h> +#include <netinet/udp.h> + +#include "network_helpers.h" +#include "test_assign_reuse.skel.h" + +#define NS_TEST "assign_reuse" +#define LOOPBACK 1 +#define PORT 4443 + +static int attach_reuseport(int sock_fd, int prog_fd) +{ + return setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, + &prog_fd, sizeof(prog_fd)); +} + +static socklen_t inetaddr_len(const struct sockaddr_storage *addr) +{ + return addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) : + addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : + 0; +} + +static bool is_ipv6(const char *ip) +{ + return !!strchr(ip, ':'); +} + +static int make_socket(int sotype, const char *ip, int port, + struct sockaddr_storage *addr) +{ + struct timeval timeo = { .tv_usec = 500000 /* 500 ms */ }; + int family = is_ipv6(ip) ? AF_INET6 : AF_INET; + int ret, fd; + + ret = make_sockaddr(family, ip, port, addr, NULL); + if (!ASSERT_OK(ret, "make_sockaddr")) + return -1; + fd = socket(addr->ss_family, sotype, 0); + if (!ASSERT_GE(fd, 0, "socket")) + return -1; + ret = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo)); + if (!ASSERT_OK(ret, "sndtimeo")) { + close(fd); + return -1; + } + ret = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo)); + if (!ASSERT_OK(ret, "rcvtimeo")) { + close(fd); + return -1; + } + return fd; +} + +static int create_server(int sotype, const char *ip, int port, int prog_fd) +{ + struct sockaddr_storage addr = {}; + const int one = 1; + int ret, fd; + + fd = make_socket(sotype, ip, port, &addr); + if (fd < 0) + return -1; + if (sotype == SOCK_STREAM) { + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one)); + if (!ASSERT_OK(ret, "reuseaddr")) + goto cleanup; + } + if (prog_fd >= 0) { + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, + sizeof(one)); + if (!ASSERT_OK(ret, "reuseport")) + goto cleanup; + ret = attach_reuseport(fd, prog_fd); + if (!ASSERT_OK(ret, "attach_reuseport")) + goto cleanup; + } + ret = bind(fd, (void *)&addr, inetaddr_len(&addr)); + if (!ASSERT_OK(ret, "bind")) + goto cleanup; + if (sotype == SOCK_STREAM) { + ret = listen(fd, SOMAXCONN); + if (!ASSERT_OK(ret, "listen")) + goto cleanup; + } + return fd; +cleanup: + close(fd); + return -1; +} + +static int create_client(int sotype, const char *ip, int port) +{ + struct sockaddr_storage addr = {}; + int ret, fd; + + fd = make_socket(sotype, ip, port, &addr); + if (fd < 0) + return -1; + ret = connect(fd, (void *)&addr, inetaddr_len(&addr)); + if (ret) + goto cleanup; + return fd; +cleanup: + close(fd); + return -1; +} + +static __u64 cookie(int fd) +{ + __u64 cookie = 0; + socklen_t cookie_len = sizeof(cookie); + int ret; + + ret = getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len); + ASSERT_OK(ret, "cookie"); + ASSERT_GT(cookie, 0, "cookie_invalid"); + + return cookie; +} + +static int echo_test_udp(int fd_sv, const char *ip, int port) +{ + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + char buff[1] = {}; + int fd_cl = -1, ret; + + fd_cl = create_client(SOCK_DGRAM, ip, port); + ASSERT_GT(fd_cl, 0, "create_client"); + ASSERT_EQ(getsockname(fd_cl, (void *)&addr, &len), 0, "getsockname"); + + ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client"); + + ret = recv(fd_sv, buff, sizeof(buff), 0); + if (ret < 0) + return errno; + + ASSERT_EQ(ret, 1, "recv_server"); + ASSERT_EQ(sendto(fd_sv, buff, sizeof(buff), 0, (void *)&addr, len), 1, "send_server"); + ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client"); + close(fd_cl); + return 0; +} + +static int echo_test_tcp(int fd_sv, const char *ip, int port) +{ + char buff[1] = {}; + int fd_cl = -1, fd_sv_cl = -1; + + fd_cl = create_client(SOCK_STREAM, ip, port); + if (fd_cl < 0) + return errno; + + fd_sv_cl = accept(fd_sv, NULL, NULL); + ASSERT_GE(fd_sv_cl, 0, "accept_fd"); + + ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client"); + ASSERT_EQ(recv(fd_sv_cl, buff, sizeof(buff), 0), 1, "recv_server"); + ASSERT_EQ(send(fd_sv_cl, buff, sizeof(buff), 0), 1, "send_server"); + ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client"); + close(fd_sv_cl); + close(fd_cl); + return 0; +} + +void run_assign_reuse(int sotype, const char *ip, int port) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = LOOPBACK, + .attach_point = BPF_TC_INGRESS, ); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1, ); + bool hook_created = false, tc_attached = false; + int ret, fd_tc, fd_accept, fd_drop, fd_sv = -1, fd_map; + __u64 fd_val; + struct test_assign_reuse *skel; + const int zero = 0; + + skel = test_assign_reuse__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->rodata->dest_port = port; + + ret = test_assign_reuse__load(skel); + if (!ASSERT_OK(ret, "skel_load")) + goto cleanup; + + ASSERT_EQ(skel->bss->sk_cookie_seen, 0, "cookie_init"); + + fd_tc = bpf_program__fd(skel->progs.tc_main); + fd_accept = bpf_program__fd(skel->progs.reuse_accept); + fd_drop = bpf_program__fd(skel->progs.reuse_drop); + fd_map = bpf_map__fd(skel->maps.sk_map); + + fd_sv = create_server(sotype, ip, port, fd_drop); + if (!ASSERT_GE(fd_sv, 0, "create_server")) + goto cleanup; + + fd_val = fd_sv; + ret = bpf_map_update_elem(fd_map, &zero, &fd_val, BPF_NOEXIST); + if (!ASSERT_OK(ret, "bpf_sk_map")) + goto cleanup; + + ret = bpf_tc_hook_create(&tc_hook); + if (ret == 0) + hook_created = true; + ret = ret == -EEXIST ? 0 : ret; + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = fd_tc; + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto cleanup; + tc_attached = true; + + if (sotype == SOCK_STREAM) + ASSERT_EQ(echo_test_tcp(fd_sv, ip, port), ECONNREFUSED, "drop_tcp"); + else + ASSERT_EQ(echo_test_udp(fd_sv, ip, port), EAGAIN, "drop_udp"); + ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once"); + + skel->bss->sk_cookie_seen = 0; + skel->bss->reuseport_executed = 0; + ASSERT_OK(attach_reuseport(fd_sv, fd_accept), "attach_reuseport(accept)"); + + if (sotype == SOCK_STREAM) + ASSERT_EQ(echo_test_tcp(fd_sv, ip, port), 0, "echo_tcp"); + else + ASSERT_EQ(echo_test_udp(fd_sv, ip, port), 0, "echo_udp"); + + ASSERT_EQ(skel->bss->sk_cookie_seen, cookie(fd_sv), + "cookie_mismatch"); + ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once"); +cleanup: + if (tc_attached) { + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + ret = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(ret, "bpf_tc_detach"); + } + if (hook_created) { + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + } + test_assign_reuse__destroy(skel); + close(fd_sv); +} + +void serial_test_assign_reuse(void) +{ + struct nstoken *tok = NULL; + + SYS(out, "ip netns add %s", NS_TEST); + SYS(cleanup, "ip -net %s link set dev lo up", NS_TEST); + + tok = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(tok, "netns token")) + return; + + if (test__start_subtest("tcpv4")) + run_assign_reuse(SOCK_STREAM, "127.0.0.1", PORT); + if (test__start_subtest("tcpv6")) + run_assign_reuse(SOCK_STREAM, "::1", PORT); + if (test__start_subtest("udpv4")) + run_assign_reuse(SOCK_DGRAM, "127.0.0.1", PORT); + if (test__start_subtest("udpv6")) + run_assign_reuse(SOCK_DGRAM, "::1", PORT); + +cleanup: + close_netns(tok); + SYS_NOFAIL("ip netns delete %s", NS_TEST); +out: + return; +} diff --git a/tools/testing/selftests/bpf/progs/test_assign_reuse.c b/tools/testing/selftests/bpf/progs/test_assign_reuse.c new file mode 100644 index 000000000000..c83e870b2612 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_assign_reuse.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <stdbool.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> +#include <linux/pkt_cls.h> + +char LICENSE[] SEC("license") = "GPL"; + +__u64 sk_cookie_seen; +__u64 reuseport_executed; +union { + struct tcphdr tcp; + struct udphdr udp; +} headers; + +const volatile __u16 dest_port; + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} sk_map SEC(".maps"); + +SEC("sk_reuseport") +int reuse_accept(struct sk_reuseport_md *ctx) +{ + reuseport_executed++; + + if (ctx->ip_protocol == IPPROTO_TCP) { + if (ctx->data + sizeof(headers.tcp) > ctx->data_end) + return SK_DROP; + + if (__builtin_memcmp(&headers.tcp, ctx->data, sizeof(headers.tcp)) != 0) + return SK_DROP; + } else if (ctx->ip_protocol == IPPROTO_UDP) { + if (ctx->data + sizeof(headers.udp) > ctx->data_end) + return SK_DROP; + + if (__builtin_memcmp(&headers.udp, ctx->data, sizeof(headers.udp)) != 0) + return SK_DROP; + } else { + return SK_DROP; + } + + sk_cookie_seen = bpf_get_socket_cookie(ctx->sk); + return SK_PASS; +} + +SEC("sk_reuseport") +int reuse_drop(struct sk_reuseport_md *ctx) +{ + reuseport_executed++; + sk_cookie_seen = 0; + return SK_DROP; +} + +static inline int +assign_sk(struct __sk_buff *skb) +{ + int zero = 0, ret = 0; + struct bpf_sock *sk; + + sk = bpf_map_lookup_elem(&sk_map, &zero); + if (!sk) + return TC_ACT_SHOT; + ret = bpf_sk_assign(skb, sk, 0); + bpf_sk_release(sk); + return ret ? TC_ACT_SHOT : TC_ACT_OK; +} + +static inline bool +maybe_assign_tcp(struct __sk_buff *skb, struct tcphdr *th) +{ + if (th + 1 > (void *)(long)(skb->data_end)) + return TC_ACT_SHOT; + + if (!th->syn || th->ack || th->dest != bpf_htons(dest_port)) + return TC_ACT_OK; + + __builtin_memcpy(&headers.tcp, th, sizeof(headers.tcp)); + return assign_sk(skb); +} + +static inline bool +maybe_assign_udp(struct __sk_buff *skb, struct udphdr *uh) +{ + if (uh + 1 > (void *)(long)(skb->data_end)) + return TC_ACT_SHOT; + + if (uh->dest != bpf_htons(dest_port)) + return TC_ACT_OK; + + __builtin_memcpy(&headers.udp, uh, sizeof(headers.udp)); + return assign_sk(skb); +} + +SEC("tc") +int tc_main(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth; + + eth = (struct ethhdr *)(data); + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth)); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + + if (iph->protocol == IPPROTO_TCP) + return maybe_assign_tcp(skb, (struct tcphdr *)(iph + 1)); + else if (iph->protocol == IPPROTO_UDP) + return maybe_assign_udp(skb, (struct udphdr *)(iph + 1)); + else + return TC_ACT_SHOT; + } else { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth)); + + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + if (ip6h->nexthdr == IPPROTO_TCP) + return maybe_assign_tcp(skb, (struct tcphdr *)(ip6h + 1)); + else if (ip6h->nexthdr == IPPROTO_UDP) + return maybe_assign_udp(skb, (struct udphdr *)(ip6h + 1)); + else + return TC_ACT_SHOT; + } +} -- 2.40.1