On Tue, Jul 12, 2022 at 09:52:51PM +0100, Pavel Begunkov wrote: >Add selftests for io_uring zerocopy sends and io_uring's notification >infrastructure. It's largely influenced by msg_zerocopy and uses it on >the receive side. > >Signed-off-by: Pavel Begunkov <asml.silence@xxxxxxxxx> >--- > tools/testing/selftests/net/Makefile | 1 + > .../selftests/net/io_uring_zerocopy_tx.c | 605 ++++++++++++++++++ > .../selftests/net/io_uring_zerocopy_tx.sh | 131 ++++ > 3 files changed, 737 insertions(+) > create mode 100644 tools/testing/selftests/net/io_uring_zerocopy_tx.c > create mode 100755 tools/testing/selftests/net/io_uring_zerocopy_tx.sh > >diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile >index 7ea54af55490..51261483744e 100644 >--- a/tools/testing/selftests/net/Makefile >+++ b/tools/testing/selftests/net/Makefile >@@ -59,6 +59,7 @@ TEST_GEN_FILES += toeplitz > TEST_GEN_FILES += cmsg_sender > TEST_GEN_FILES += stress_reuseport_listen > TEST_PROGS += test_vxlan_vnifiltering.sh >+TEST_GEN_FILES += io_uring_zerocopy_tx > > TEST_FILES := settings > >diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c >new file mode 100644 >index 000000000000..9d64c560a2d6 >--- /dev/null >+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c >@@ -0,0 +1,605 @@ >+/* SPDX-License-Identifier: MIT */ >+/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */ >+#include <assert.h> >+#include <errno.h> >+#include <error.h> >+#include <fcntl.h> >+#include <limits.h> >+#include <stdbool.h> >+#include <stdint.h> >+#include <stdio.h> >+#include <stdlib.h> >+#include <string.h> >+#include <unistd.h> >+ >+#include <arpa/inet.h> >+#include <linux/errqueue.h> >+#include <linux/if_packet.h> >+#include <linux/io_uring.h> >+#include <linux/ipv6.h> >+#include <linux/socket.h> >+#include <linux/sockios.h> >+#include <net/ethernet.h> >+#include <net/if.h> >+#include <netinet/in.h> >+#include <netinet/ip.h> >+#include <netinet/ip6.h> >+#include <netinet/tcp.h> >+#include <netinet/udp.h> >+#include <sys/ioctl.h> >+#include <sys/mman.h> >+#include <sys/resource.h> >+#include <sys/socket.h> >+#include <sys/stat.h> >+#include <sys/time.h> >+#include <sys/types.h> >+#include <sys/un.h> >+#include <sys/wait.h> >+ >+#define NOTIF_TAG 0xfffffffULL >+#define NONZC_TAG 0 >+#define ZC_TAG 1 >+ <...> >+static void do_test(int domain, int type, int protocol) >+{ >+ int i; >+ >+ for (i = 0; i < IP_MAXPACKET; i++) >+ payload[i] = 'a' + (i % 26); >+ do_tx(domain, type, protocol); >+} >+ >+static void usage(const char *filepath) >+{ >+ error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] " >+ "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath); A small flaw, the usage here doesn't match the real options in parse_opts(). Thanks >+} >+ >+static void parse_opts(int argc, char **argv) >+{ >+ const int max_payload_len = sizeof(payload) - >+ sizeof(struct ipv6hdr) - >+ sizeof(struct tcphdr) - >+ 40 /* max tcp options */; >+ struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr; >+ struct sockaddr_in *addr4 = (void *) &cfg_dst_addr; >+ char *daddr = NULL; >+ int c; >+ >+ if (argc <= 1) >+ usage(argv[0]); >+ cfg_payload_len = max_payload_len; >+ >+ while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) { >+ switch (c) { >+ case '4': >+ if (cfg_family != PF_UNSPEC) >+ error(1, 0, "Pass one of -4 or -6"); >+ cfg_family = PF_INET; >+ cfg_alen = sizeof(struct sockaddr_in); >+ break; >+ case '6': >+ if (cfg_family != PF_UNSPEC) >+ error(1, 0, "Pass one of -4 or -6"); >+ cfg_family = PF_INET6; >+ cfg_alen = sizeof(struct sockaddr_in6); >+ break; >+ case 'D': >+ daddr = optarg; >+ break; >+ case 'p': >+ cfg_port = strtoul(optarg, NULL, 0); >+ break; >+ case 's': >+ cfg_payload_len = strtoul(optarg, NULL, 0); >+ break; >+ case 't': >+ cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; >+ break; >+ case 'n': >+ cfg_nr_reqs = strtoul(optarg, NULL, 0); >+ break; >+ case 'f': >+ cfg_flush = 1; >+ break; >+ case 'c': >+ cfg_cork = strtol(optarg, NULL, 0); >+ break; >+ case 'm': >+ cfg_mode = strtol(optarg, NULL, 0); >+ break; >+ } >+ } >+ >+ switch (cfg_family) { >+ case PF_INET: >+ memset(addr4, 0, sizeof(*addr4)); >+ addr4->sin_family = AF_INET; >+ addr4->sin_port = htons(cfg_port); >+ if (daddr && >+ inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1) >+ error(1, 0, "ipv4 parse error: %s", daddr); >+ break; >+ case PF_INET6: >+ memset(addr6, 0, sizeof(*addr6)); >+ addr6->sin6_family = AF_INET6; >+ addr6->sin6_port = htons(cfg_port); >+ if (daddr && >+ inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1) >+ error(1, 0, "ipv6 parse error: %s", daddr); >+ break; >+ default: >+ error(1, 0, "illegal domain"); >+ } >+ >+ if (cfg_payload_len > max_payload_len) >+ error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); >+ if (cfg_mode == MODE_NONZC && cfg_flush) >+ error(1, 0, "-f: only zerocopy modes support notifications"); >+ if (optind != argc - 1) >+ usage(argv[0]); >+} >+ >+int main(int argc, char **argv) >+{ >+ const char *cfg_test = argv[argc - 1]; >+ >+ parse_opts(argc, argv); >+ >+ if (!strcmp(cfg_test, "tcp")) >+ do_test(cfg_family, SOCK_STREAM, 0); >+ else if (!strcmp(cfg_test, "udp")) >+ do_test(cfg_family, SOCK_DGRAM, 0); >+ else >+ error(1, 0, "unknown cfg_test %s", cfg_test); >+ return 0; >+} >diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh >new file mode 100755 >index 000000000000..6a65e4437640 >--- /dev/null >+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh >@@ -0,0 +1,131 @@ >+#!/bin/bash >+# >+# Send data between two processes across namespaces >+# Run twice: once without and once with zerocopy >+ >+set -e >+ >+readonly DEV="veth0" >+readonly DEV_MTU=65535 >+readonly BIN_TX="./io_uring_zerocopy_tx" >+readonly BIN_RX="./msg_zerocopy" >+ >+readonly RAND="$(mktemp -u XXXXXX)" >+readonly NSPREFIX="ns-${RAND}" >+readonly NS1="${NSPREFIX}1" >+readonly NS2="${NSPREFIX}2" >+ >+readonly SADDR4='192.168.1.1' >+readonly DADDR4='192.168.1.2' >+readonly SADDR6='fd::1' >+readonly DADDR6='fd::2' >+ >+readonly path_sysctl_mem="net.core.optmem_max" >+ >+# No arguments: automated test >+if [[ "$#" -eq "0" ]]; then >+ IPs=( "4" "6" ) >+ protocols=( "tcp" "udp" ) >+ >+ for IP in "${IPs[@]}"; do >+ for proto in "${protocols[@]}"; do >+ for mode in $(seq 1 3); do >+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 >+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f >+ $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f >+ done >+ done >+ done >+ >+ echo "OK. All tests passed" >+ exit 0 >+fi >+ >+# Argument parsing >+if [[ "$#" -lt "2" ]]; then >+ echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" >+ exit 1 >+fi >+ >+readonly IP="$1" >+shift >+readonly TXMODE="$1" >+shift >+readonly EXTRA_ARGS="$@" >+ >+# Argument parsing: configure addresses >+if [[ "${IP}" == "4" ]]; then >+ readonly SADDR="${SADDR4}" >+ readonly DADDR="${DADDR4}" >+elif [[ "${IP}" == "6" ]]; then >+ readonly SADDR="${SADDR6}" >+ readonly DADDR="${DADDR6}" >+else >+ echo "Invalid IP version ${IP}" >+ exit 1 >+fi >+ >+# Argument parsing: select receive mode >+# >+# This differs from send mode for >+# - packet: use raw recv, because packet receives skb clones >+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option >+case "${TXMODE}" in >+'packet' | 'packet_dgram' | 'raw_hdrincl') >+ RXMODE='raw' >+ ;; >+*) >+ RXMODE="${TXMODE}" >+ ;; >+esac >+ >+# Start of state changes: install cleanup handler >+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" >+ >+cleanup() { >+ ip netns del "${NS2}" >+ ip netns del "${NS1}" >+ sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" >+} >+ >+trap cleanup EXIT >+ >+# Configure system settings >+sysctl -w -q "${path_sysctl_mem}=1000000" >+ >+# Create virtual ethernet pair between network namespaces >+ip netns add "${NS1}" >+ip netns add "${NS2}" >+ >+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ >+ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" >+ >+# Bring the devices up >+ip -netns "${NS1}" link set "${DEV}" up >+ip -netns "${NS2}" link set "${DEV}" up >+ >+# Set fixed MAC addresses on the devices >+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 >+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 >+ >+# Add fixed IP addresses to the devices >+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" >+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" >+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad >+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad >+ >+# Optionally disable sg or csum offload to test edge cases >+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off >+ >+do_test() { >+ local readonly ARGS="$1" >+ >+ echo "ipv${IP} ${TXMODE} ${ARGS}" >+ ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" & >+ sleep 0.2 >+ ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}" >+ wait >+} >+ >+do_test "${EXTRA_ARGS}" >+echo ok >-- >2.37.0