On Sun, Mar 06, 2022 at 11:34:04PM +0100, Toke Høiland-Jørgensen wrote: > +#define NUM_PKTS 1000000 It took my qemu 30s to run. Would it have the same test coverage by lowering it to something like 10000 ? > +void test_xdp_do_redirect(void) > +{ > + int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst; > + char data[sizeof(pkt_udp) + sizeof(__u32)]; > + struct test_xdp_do_redirect *skel = NULL; > + struct nstoken *nstoken = NULL; > + struct bpf_link *link; > + > + struct xdp_md ctx_in = { .data = sizeof(__u32), > + .data_end = sizeof(data) }; > + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, > + .data_in = &data, > + .data_size_in = sizeof(data), > + .ctx_in = &ctx_in, > + .ctx_size_in = sizeof(ctx_in), > + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, > + .repeat = NUM_PKTS, > + .batch_size = 64, > + ); > + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, > + .attach_point = BPF_TC_INGRESS); > + > + memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp)); > + *((__u32 *)data) = 0x42; /* metadata test value */ > + > + skel = test_xdp_do_redirect__open(); > + if (!ASSERT_OK_PTR(skel, "skel")) > + return; > + > + /* The XDP program we run with bpf_prog_run() will cycle through all > + * three xmit (PASS/TX/REDIRECT) return codes starting from above, and > + * ending up with PASS, so we should end up with two packets on the dst > + * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP > + * payload. > + */ > + SYS("ip netns add testns"); > + nstoken = open_netns("testns"); > + if (!ASSERT_OK_PTR(nstoken, "setns")) > + goto out; > + > + SYS("ip link add veth_src type veth peer name veth_dst"); > + SYS("ip link set dev veth_src address 00:11:22:33:44:55"); > + SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb"); > + SYS("ip link set dev veth_src up"); > + SYS("ip link set dev veth_dst up"); > + SYS("ip addr add dev veth_src fc00::1/64"); > + SYS("ip addr add dev veth_dst fc00::2/64"); > + SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); > + > + /* We enable forwarding in the test namespace because that will cause > + * the packets that go through the kernel stack (with XDP_PASS) to be > + * forwarded back out the same interface (because of the packet dst > + * combined with the interface addresses). When this happens, the > + * regular forwarding path will end up going through the same > + * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a > + * deadlock if it happens on the same CPU. There's a local_bh_disable() > + * in the test_run code to prevent this, but an earlier version of the > + * code didn't have this, so we keep the test behaviour to make sure the > + * bug doesn't resurface. > + */ > + SYS("sysctl -qw net.ipv6.conf.all.forwarding=1"); > + > + ifindex_src = if_nametoindex("veth_src"); > + ifindex_dst = if_nametoindex("veth_dst"); > + if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") || > + !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst")) > + goto out; > + > + memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN); > + skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */ > + skel->rodata->ifindex_in = ifindex_src; > + ctx_in.ingress_ifindex = ifindex_src; > + tc_hook.ifindex = ifindex_src; > + > + if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load")) > + goto out; > + > + link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst); > + if (!ASSERT_OK_PTR(link, "prog_attach")) > + goto out; > + skel->links.xdp_count_pkts = link; > + > + tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts); > + if (attach_tc_prog(&tc_hook, tc_prog_fd)) > + goto out; > + > + xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect); > + err = bpf_prog_test_run_opts(xdp_prog_fd, &opts); > + if (!ASSERT_OK(err, "prog_run")) > + goto out_tc; > + > + /* wait for the packets to be flushed */ > + kern_sync_rcu(); > + > + /* There will be one packet sent through XDP_REDIRECT and one through > + * XDP_TX; these will show up on the XDP counting program, while the > + * rest will be counted at the TC ingress hook (and the counting program > + * resets the packet payload so they don't get counted twice even though > + * they are re-xmited out the veth device > + */ > + ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp"); > + ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc"); > + > +out_tc: > + bpf_tc_hook_destroy(&tc_hook); > +out: > + if (nstoken) > + close_netns(nstoken); > + system("ip netns del testns"); > + test_xdp_do_redirect__destroy(skel); > +} > diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c > new file mode 100644 > index 000000000000..d785f48304ea > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c > @@ -0,0 +1,92 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include <vmlinux.h> > +#include <bpf/bpf_helpers.h> > + > +#define ETH_ALEN 6 > +#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr)) > +const volatile int ifindex_out; > +const volatile int ifindex_in; > +const volatile __u8 expect_dst[ETH_ALEN]; > +volatile int pkts_seen_xdp = 0; > +volatile int pkts_seen_tc = 0; > +volatile int retcode = XDP_REDIRECT; > + > +SEC("xdp") > +int xdp_redirect(struct xdp_md *xdp) > +{ > + __u32 *metadata = (void *)(long)xdp->data_meta; > + void *data_end = (void *)(long)xdp->data_end; > + void *data = (void *)(long)xdp->data; > + > + __u8 *payload = data + HDR_SZ; > + int ret = retcode; > + > + if (payload + 1 > data_end) > + return XDP_ABORTED; > + > + if (xdp->ingress_ifindex != ifindex_in) > + return XDP_ABORTED; > + > + if (metadata + 1 > data) > + return XDP_ABORTED; > + > + if (*metadata != 0x42) > + return XDP_ABORTED; > + > + *payload = 0x42; nit. How about also adding a pkts_seen_zero counter here, like if (*payload == 0) { *payload = 0x42; pkts_seen_zero++; } and add ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero") to the prog_tests. It can better show the recycled page's data is not re-initialized. > + > + if (bpf_xdp_adjust_meta(xdp, 4)) > + return XDP_ABORTED; > + > + if (retcode > XDP_PASS) > + retcode--; > + > + if (ret == XDP_REDIRECT) > + return bpf_redirect(ifindex_out, 0); > + > + return ret; > +} > + > +static bool check_pkt(void *data, void *data_end) > +{ > + struct ipv6hdr *iph = data + sizeof(struct ethhdr); > + __u8 *payload = data + HDR_SZ; > + > + if (payload + 1 > data_end) > + return false; > + > + if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42) > + return false; > + > + /* reset the payload so the same packet doesn't get counted twice when > + * it cycles back through the kernel path and out the dst veth > + */ > + *payload = 0; > + return true; > +} > + > +SEC("xdp") > +int xdp_count_pkts(struct xdp_md *xdp) > +{ > + void *data = (void *)(long)xdp->data; > + void *data_end = (void *)(long)xdp->data_end; > + > + if (check_pkt(data, data_end)) > + pkts_seen_xdp++; > + > + return XDP_DROP; nit. A comment here will be useful to explain XDP_DROP from the xdp@veth@ingress will put the page back to the recycle pool, which will be similar to xmit-ing out of a real NIC. > +} > + > +SEC("tc") > +int tc_count_pkts(struct __sk_buff *skb) > +{ > + void *data = (void *)(long)skb->data; > + void *data_end = (void *)(long)skb->data_end; > + > + if (check_pkt(data, data_end)) > + pkts_seen_tc++; > + > + return 0; > +} > + > +char _license[] SEC("license") = "GPL"; > -- > 2.35.1 >