Martin KaFai Lau <kafai@xxxxxx> writes: > On Sun, Mar 06, 2022 at 11:34:04PM +0100, Toke Høiland-Jørgensen wrote: > >> +#define NUM_PKTS 1000000 > It took my qemu 30s to run. > Would it have the same test coverage by lowering it to something > like 10000 ? Yikes! Sure, that should be fine I think! >> +void test_xdp_do_redirect(void) >> +{ >> + int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst; >> + char data[sizeof(pkt_udp) + sizeof(__u32)]; >> + struct test_xdp_do_redirect *skel = NULL; >> + struct nstoken *nstoken = NULL; >> + struct bpf_link *link; >> + >> + struct xdp_md ctx_in = { .data = sizeof(__u32), >> + .data_end = sizeof(data) }; >> + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, >> + .data_in = &data, >> + .data_size_in = sizeof(data), >> + .ctx_in = &ctx_in, >> + .ctx_size_in = sizeof(ctx_in), >> + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, >> + .repeat = NUM_PKTS, >> + .batch_size = 64, >> + ); >> + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, >> + .attach_point = BPF_TC_INGRESS); >> + >> + memcpy(&data[sizeof(__u32)], &pkt_udp, sizeof(pkt_udp)); >> + *((__u32 *)data) = 0x42; /* metadata test value */ >> + >> + skel = test_xdp_do_redirect__open(); >> + if (!ASSERT_OK_PTR(skel, "skel")) >> + return; >> + >> + /* The XDP program we run with bpf_prog_run() will cycle through all >> + * three xmit (PASS/TX/REDIRECT) return codes starting from above, and >> + * ending up with PASS, so we should end up with two packets on the dst >> + * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP >> + * payload. >> + */ >> + SYS("ip netns add testns"); >> + nstoken = open_netns("testns"); >> + if (!ASSERT_OK_PTR(nstoken, "setns")) >> + goto out; >> + >> + SYS("ip link add veth_src type veth peer name veth_dst"); >> + SYS("ip link set dev veth_src address 00:11:22:33:44:55"); >> + SYS("ip link set dev veth_dst address 66:77:88:99:aa:bb"); >> + SYS("ip link set dev veth_src up"); >> + SYS("ip link set dev veth_dst up"); >> + SYS("ip addr add dev veth_src fc00::1/64"); >> + SYS("ip addr add dev veth_dst fc00::2/64"); >> + SYS("ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb"); >> + >> + /* We enable forwarding in the test namespace because that will cause >> + * the packets that go through the kernel stack (with XDP_PASS) to be >> + * forwarded back out the same interface (because of the packet dst >> + * combined with the interface addresses). When this happens, the >> + * regular forwarding path will end up going through the same >> + * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a >> + * deadlock if it happens on the same CPU. There's a local_bh_disable() >> + * in the test_run code to prevent this, but an earlier version of the >> + * code didn't have this, so we keep the test behaviour to make sure the >> + * bug doesn't resurface. >> + */ >> + SYS("sysctl -qw net.ipv6.conf.all.forwarding=1"); >> + >> + ifindex_src = if_nametoindex("veth_src"); >> + ifindex_dst = if_nametoindex("veth_dst"); >> + if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") || >> + !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst")) >> + goto out; >> + >> + memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN); >> + skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */ >> + skel->rodata->ifindex_in = ifindex_src; >> + ctx_in.ingress_ifindex = ifindex_src; >> + tc_hook.ifindex = ifindex_src; >> + >> + if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load")) >> + goto out; >> + >> + link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst); >> + if (!ASSERT_OK_PTR(link, "prog_attach")) >> + goto out; >> + skel->links.xdp_count_pkts = link; >> + >> + tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts); >> + if (attach_tc_prog(&tc_hook, tc_prog_fd)) >> + goto out; >> + >> + xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect); >> + err = bpf_prog_test_run_opts(xdp_prog_fd, &opts); >> + if (!ASSERT_OK(err, "prog_run")) >> + goto out_tc; >> + >> + /* wait for the packets to be flushed */ >> + kern_sync_rcu(); >> + >> + /* There will be one packet sent through XDP_REDIRECT and one through >> + * XDP_TX; these will show up on the XDP counting program, while the >> + * rest will be counted at the TC ingress hook (and the counting program >> + * resets the packet payload so they don't get counted twice even though >> + * they are re-xmited out the veth device >> + */ >> + ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp"); >> + ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc"); >> + >> +out_tc: >> + bpf_tc_hook_destroy(&tc_hook); >> +out: >> + if (nstoken) >> + close_netns(nstoken); >> + system("ip netns del testns"); >> + test_xdp_do_redirect__destroy(skel); >> +} >> diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c >> new file mode 100644 >> index 000000000000..d785f48304ea >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c >> @@ -0,0 +1,92 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +#include <vmlinux.h> >> +#include <bpf/bpf_helpers.h> >> + >> +#define ETH_ALEN 6 >> +#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr)) >> +const volatile int ifindex_out; >> +const volatile int ifindex_in; >> +const volatile __u8 expect_dst[ETH_ALEN]; >> +volatile int pkts_seen_xdp = 0; >> +volatile int pkts_seen_tc = 0; >> +volatile int retcode = XDP_REDIRECT; >> + >> +SEC("xdp") >> +int xdp_redirect(struct xdp_md *xdp) >> +{ >> + __u32 *metadata = (void *)(long)xdp->data_meta; >> + void *data_end = (void *)(long)xdp->data_end; >> + void *data = (void *)(long)xdp->data; >> + >> + __u8 *payload = data + HDR_SZ; >> + int ret = retcode; >> + >> + if (payload + 1 > data_end) >> + return XDP_ABORTED; >> + >> + if (xdp->ingress_ifindex != ifindex_in) >> + return XDP_ABORTED; >> + >> + if (metadata + 1 > data) >> + return XDP_ABORTED; >> + >> + if (*metadata != 0x42) >> + return XDP_ABORTED; >> + >> + *payload = 0x42; > nit. How about also adding a pkts_seen_zero counter here, like > if (*payload == 0) { > *payload = 0x42; > pkts_seen_zero++; > } > > and add ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero") > to the prog_tests. It can better show the recycled page's data > is not re-initialized. Good idea, will add! >> + >> + if (bpf_xdp_adjust_meta(xdp, 4)) >> + return XDP_ABORTED; >> + >> + if (retcode > XDP_PASS) >> + retcode--; >> + >> + if (ret == XDP_REDIRECT) >> + return bpf_redirect(ifindex_out, 0); >> + >> + return ret; >> +} >> + >> +static bool check_pkt(void *data, void *data_end) >> +{ >> + struct ipv6hdr *iph = data + sizeof(struct ethhdr); >> + __u8 *payload = data + HDR_SZ; >> + >> + if (payload + 1 > data_end) >> + return false; >> + >> + if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42) >> + return false; >> + >> + /* reset the payload so the same packet doesn't get counted twice when >> + * it cycles back through the kernel path and out the dst veth >> + */ >> + *payload = 0; >> + return true; >> +} >> + >> +SEC("xdp") >> +int xdp_count_pkts(struct xdp_md *xdp) >> +{ >> + void *data = (void *)(long)xdp->data; >> + void *data_end = (void *)(long)xdp->data_end; >> + >> + if (check_pkt(data, data_end)) >> + pkts_seen_xdp++; >> + >> + return XDP_DROP; > nit. A comment here will be useful to explain XDP_DROP from > the xdp@veth@ingress will put the page back to the recycle > pool, which will be similar to xmit-ing out of a real NIC. Sure, can do. -Toke