[PATCH net-next v2 2/2] Tests for RTAX_INITRWND

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Accompanying tests. We open skops program, hooking on
BPF_SOCK_OPS_RWND_INIT event, where we return updated value of
initrwnd route path attribute.

In tests we see if values above 64KiB indeed are advertised correctly
to the remote peer.

Signed-off-by: Marek Majkowski <marek@xxxxxxxxxxxxxx>
---
 .../selftests/bpf/prog_tests/tcp_initrwnd.c   | 420 ++++++++++++++++++
 .../selftests/bpf/progs/test_tcp_initrwnd.c   |  30 ++
 2 files changed, 450 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tcp_initrwnd.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_tcp_initrwnd.c

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_initrwnd.c b/tools/testing/selftests/bpf/prog_tests/tcp_initrwnd.c
new file mode 100644
index 000000000000..af54dde05609
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_initrwnd.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2022 Cloudflare
+
+#include "test_progs.h"
+#include "bpf_util.h"
+#include "network_helpers.h"
+
+#include "test_tcp_initrwnd.skel.h"
+
+#define CG_NAME "/tcpbpf-user-test"
+
+/* It's easier to hardcode offsets than to fight with headers
+ *
+ * $ pahole tcp_info
+ * struct tcp_info {
+ *	__u32                      tcpi_rcv_ssthresh;   *    64     4 *
+ *	__u32                      tcpi_snd_wnd;        *   228     4 *
+ */
+
+#define TCPI_RCV_SSTHRESH(info) info[64 / 4]
+#define TCPI_SND_WND(info) info[228 / 4]
+
+static int read_int_sysctl(const char *sysctl)
+{
+	char buf[16];
+	int fd, ret;
+
+	fd = open(sysctl, 0);
+	if (CHECK_FAIL(fd == -1))
+		goto err;
+
+	ret = read(fd, buf, sizeof(buf));
+	if (CHECK_FAIL(ret <= 0))
+		goto err;
+
+	close(fd);
+	return atoi(buf);
+err:
+	if (fd < 0)
+		close(fd);
+	return -1;
+}
+
+static int write_int_sysctl(const char *sysctl, int v)
+{
+	int fd, ret, size;
+	char buf[16];
+
+	fd = open(sysctl, O_RDWR);
+	if (CHECK_FAIL(fd < 0))
+		goto err;
+
+	size = snprintf(buf, sizeof(buf), "%d", v);
+	ret = write(fd, buf, size);
+	if (CHECK_FAIL(ret < 0))
+		goto err;
+
+	close(fd);
+	return 0;
+err:
+	if (fd < 0)
+		close(fd);
+	return -1;
+}
+
+static int tcp_timestamps;
+static int tcp_window_scaling;
+static int tcp_workaround_signed_windows;
+static int tcp_syncookies;
+
+static void do_test_server(int server_fd, struct test_tcp_initrwnd *skel,
+			   int initrwnd, unsigned int tcpi_snd_wnd_on_connect,
+			   unsigned int rcv_ssthresh_on_recv,
+			   unsigned int tcpi_snd_wnd_on_recv)
+{
+	int client_fd = -1, sd = -1, r;
+	__u32 info[256 / 4];
+	socklen_t optlen = sizeof(info);
+	char b[1] = { 0x55 };
+
+	fprintf(stderr, "[*] initrwnd=%d\n", initrwnd);
+
+	skel->bss->initrwnd = initrwnd; // in full MSS packets
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (CHECK_FAIL(client_fd < 0))
+		goto err;
+
+	sd = accept(server_fd, NULL, NULL);
+	if (CHECK_FAIL(sd < 0))
+		goto err;
+
+	/* There are three moments where we check the window/rcv_ssthresh.
+	 *
+	 * (1) First, after socket creation, TCP handshake, we expect
+	 * the client to see only SYN+ACK which is without window
+	 * scaling. That is: from client/sender point of view we see
+	 * at most 64KiB open receive window.
+	 */
+	r = getsockopt(client_fd, SOL_TCP, TCP_INFO, &info, &optlen);
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	ASSERT_EQ(TCPI_SND_WND(info), tcpi_snd_wnd_on_connect,
+		  "getsockopt(TCP_INFO.tcpi_snd_wnd) on connect");
+
+	/* (2) At the same time, from the server/receiver point of
+	 * view, we already initiated socket, so rcv_ssthresh is set
+	 * to high value, potentially larger than 64KiB.
+	 */
+	r = getsockopt(sd, SOL_TCP, TCP_INFO, &info, &optlen);
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	ASSERT_EQ(TCPI_RCV_SSTHRESH(info), rcv_ssthresh_on_recv,
+		  "getsockopt(TCP_INFO.rcv_ssthresh) on recv");
+
+	ASSERT_LE(tcpi_snd_wnd_on_connect, rcv_ssthresh_on_recv,
+		  "snd_wnd > rcv_ssthresh");
+
+	/* (3) Finally, after receiving some ACK from client, the
+	 * client/sender should also see wider open window, larger
+	 * than 64KiB.
+	 */
+	if (CHECK_FAIL(write(client_fd, &b, sizeof(b)) != 1))
+		perror("Failed to send single byte");
+
+	if (CHECK_FAIL(read(sd, &b, sizeof(b)) != 1))
+		perror("Failed to send single byte");
+
+	r = getsockopt(client_fd, SOL_TCP, TCP_INFO, &info, &optlen);
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	ASSERT_EQ(TCPI_SND_WND(info), tcpi_snd_wnd_on_recv,
+		  "getsockopt(TCP_INFO.tcpi_snd_wnd) after recv");
+
+	ASSERT_LE(tcpi_snd_wnd_on_connect, tcpi_snd_wnd_on_recv,
+		  "snd_wnd_on_connect > snd_wnd_on_recv");
+
+err:
+	if (sd != -1)
+		close(sd);
+	if (client_fd != -1)
+		close(client_fd);
+}
+
+static int socket_client(int server_fd)
+{
+	socklen_t optlen;
+	int family, type, protocol, r;
+
+	optlen = sizeof(family);
+	r = getsockopt(server_fd, SOL_SOCKET, SO_DOMAIN, &family, &optlen);
+	if (CHECK_FAIL(r < 0))
+		return -1;
+
+	optlen = sizeof(type);
+	r = getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen);
+	if (CHECK_FAIL(r < 0))
+		return -1;
+
+	optlen = sizeof(protocol);
+	r = getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen);
+	if (CHECK_FAIL(r < 0))
+		return -1;
+
+	return socket(family, type, protocol);
+}
+
+static void do_test_client(int server_fd, struct test_tcp_initrwnd *skel,
+			   int initrwnd, unsigned int rcv_ssthresh,
+			   unsigned int tcpi_snd_wnd)
+{
+	int client_fd = -1, sd = -1, r, maxseg;
+	__u32 info[256 / 4];
+	socklen_t optlen = sizeof(info);
+	size_t rcvbuf;
+
+	fprintf(stderr, "[*] client initrwnd=%d\n", initrwnd);
+
+	skel->bss->initrwnd = initrwnd; // in full MSS packets
+
+	client_fd = socket_client(server_fd);
+	if (CHECK_FAIL(client_fd < 0))
+		goto err;
+
+	/* With MSS=64KiB on loopback it's hard to argue about init
+	 * rwnd. Let's set MSS to something that will make our life
+	 * easier, like 1024 + timestamps.
+	 */
+	maxseg = 1024;
+
+	r = setsockopt(client_fd, SOL_TCP, TCP_MAXSEG, &maxseg, sizeof(maxseg));
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	rcvbuf = 208 * 1024;
+	r = setsockopt(client_fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+		       sizeof(rcvbuf));
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	r = connect_fd_to_fd(client_fd, server_fd, 0);
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	sd = accept(server_fd, NULL, NULL);
+	if (CHECK_FAIL(sd < 0))
+		goto err;
+
+	/* There is only one moment to check - the server should know
+	 * about client window just after accept. First check client
+	 * rcv_ssthresh for sanity.
+	 */
+	r = getsockopt(client_fd, SOL_TCP, TCP_INFO, &info, &optlen);
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	ASSERT_EQ(TCPI_RCV_SSTHRESH(info), rcv_ssthresh,
+		  "getsockopt(TCP_INFO.tcpi_rcv_ssthresh) on client");
+
+	/* And the recevie window size as seen from the server.
+	 */
+	r = getsockopt(sd, SOL_TCP, TCP_INFO, &info, &optlen);
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	ASSERT_EQ(TCPI_SND_WND(info), tcpi_snd_wnd,
+		  "getsockopt(TCP_INFO.tcpi_snd_wnd)");
+
+	ASSERT_GE(rcv_ssthresh, tcpi_snd_wnd, "rcv_ssthresh < tcpi_snd_wnd");
+err:
+	if (sd != -1)
+		close(sd);
+	if (client_fd != -1)
+		close(client_fd);
+}
+
+static void run_tests(int cg_fd, struct test_tcp_initrwnd *skel)
+{
+	int server_fd = -1, r, rcvbuf, maxseg;
+	unsigned int max_wnd, buf;
+
+	skel->links.bpf_testcb =
+		bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
+	if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)"))
+		goto err;
+
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+	if (CHECK_FAIL(server_fd < 0))
+		goto err;
+
+	maxseg = 1024;
+	if (tcp_timestamps)
+		maxseg += 12;
+
+	/* With MSS=64KiB on loopback it's hard to argue about init
+	 * rwnd. Let's set MSS to something that will make our life
+	 * easier, like 1024 + timestamps.
+	 */
+	r = setsockopt(server_fd, SOL_TCP, TCP_MAXSEG, &maxseg, sizeof(maxseg));
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	/* Obviously, rcvbuffer must be large at the start for the
+	 * initrwnd to make any dent in rcv_ssthresh (assuming default
+	 * tcp_rmem of 128KiB)
+	 */
+	rcvbuf = 208 * 1024;
+	r = setsockopt(server_fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+		       sizeof(rcvbuf));
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	max_wnd = tcp_workaround_signed_windows ? 32767 : 65535;
+
+	/* [*] server advertising large window ** */
+	fprintf(stderr,
+		"[#] server timestamps=%d window_scaling=%d workaround_signed_windows=%d syncookies=%d\n",
+		tcp_timestamps, tcp_window_scaling,
+		tcp_workaround_signed_windows, tcp_syncookies);
+
+	/* Small initrwnd. Not exceeding 64KiB */
+	do_test_server(server_fd, skel, 1, 1024, 1024, 1024);
+
+	if (tcp_window_scaling) {
+		/* Borderline. Not exceeding 64KiB */
+		do_test_server(server_fd, skel, 63, MIN(max_wnd, 63 * 1024),
+			       63 * 1024, 63 * 1024);
+	} else {
+		do_test_server(server_fd, skel, 63, MIN(max_wnd, 63 * 1024),
+			       63 * 1024, MIN(max_wnd, 63 * 1024));
+	}
+
+	if (tcp_window_scaling) {
+		/* The interesting case. Crossing 64KiB */
+		do_test_server(server_fd, skel, 128, max_wnd, 128 * 1024,
+			       128 * 1024);
+	} else {
+		do_test_server(server_fd, skel, 128, max_wnd, 65535, max_wnd);
+	}
+
+	if (tcp_window_scaling) {
+		/* Super large. Remember the rcv buffer is 208*2 */
+		do_test_server(server_fd, skel, 206, max_wnd, 206 * 1024,
+			       206 * 1024);
+
+		/* Not sure why, but here you go, subtract 12 if timestamps */
+		buf = 207 * 1024U - (tcp_timestamps ? 12 : 0);
+		do_test_server(server_fd, skel, 512, max_wnd, buf, buf);
+	}
+
+	/* [*] client advertising large window ** */
+	fprintf(stderr,
+		"[#] client timestamps=%d window_scaling=%d workaround_signed_windows=%d syncookies=%d\n",
+		tcp_timestamps, tcp_window_scaling,
+		tcp_workaround_signed_windows, tcp_syncookies);
+
+	/* Ensure server mss is not 1024 not to be confusing */
+	maxseg = 32767;
+	r = setsockopt(server_fd, SOL_TCP, TCP_MAXSEG, &maxseg, sizeof(maxseg));
+	if (CHECK_FAIL(r < 0))
+		goto err;
+
+	/* Test if client advertises small rcv window */
+	do_test_client(server_fd, skel, 1, 1024, 1024);
+
+	if (tcp_window_scaling) {
+		/* Medium size */
+		do_test_client(server_fd, skel, 63, 63 * 1024, 63 * 1024);
+	} else {
+		do_test_client(server_fd, skel, 63, 63 * 1024,
+			       MIN(max_wnd, 63 * 1024));
+	}
+
+	if (tcp_window_scaling) {
+		/* And large window */
+		do_test_client(server_fd, skel, 128, 128 * 1024, 128 * 1024);
+	} else {
+		do_test_client(server_fd, skel, 128, 65535, max_wnd);
+	}
+
+	if (tcp_window_scaling) {
+		/* Super large. */
+		do_test_client(server_fd, skel, 206, 206 * 1024U, 206 * 1024U);
+
+		/* Not sure why, but here you go, subtract 12 if timestamps */
+		buf = 207 * 1024U + (tcp_timestamps ? 12 : 0);
+		do_test_client(server_fd, skel, 512, buf, buf);
+	}
+err:
+	if (server_fd != -1)
+		close(server_fd);
+}
+
+#define PROC_TCP_TIMESTAMPS "/proc/sys/net/ipv4/tcp_timestamps"
+#define PROC_TCP_WINDOW_SCALING "/proc/sys/net/ipv4/tcp_window_scaling"
+#define PROC_TCP_WORKAROUND_SIGNED_WINDOWS \
+	"/proc/sys/net/ipv4/tcp_workaround_signed_windows"
+#define PROC_TCP_SYNCOOKIES "/proc/sys/net/ipv4/tcp_syncookies"
+
+void test_tcp_initrwnd(void)
+{
+	struct test_tcp_initrwnd *skel;
+	unsigned int i;
+	int cg_fd;
+
+	int saved_tcp_timestamps = read_int_sysctl(PROC_TCP_TIMESTAMPS);
+	int saved_tcp_window_scaling = read_int_sysctl(PROC_TCP_WINDOW_SCALING);
+	int saved_tcp_workaround_signed_windows =
+		read_int_sysctl(PROC_TCP_WORKAROUND_SIGNED_WINDOWS);
+	int saved_tcp_syncookies = read_int_sysctl(PROC_TCP_SYNCOOKIES);
+
+	if (CHECK_FAIL(saved_tcp_timestamps == -1 ||
+		       saved_tcp_window_scaling == -1 ||
+		       saved_tcp_workaround_signed_windows == -1 ||
+		       saved_tcp_syncookies == -1))
+		return;
+
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (CHECK_FAIL(cg_fd < 0))
+		return;
+
+	skel = test_tcp_initrwnd__open_and_load();
+	if (CHECK_FAIL(!skel)) {
+		close(cg_fd);
+		return;
+	}
+
+	// syn cookies testing disabled
+	for (i = 0; i < 8; i++) {
+		tcp_timestamps = !!(i & 0x1);
+		tcp_window_scaling = !!(i & 0x2);
+		tcp_workaround_signed_windows = !!(i & 0x4);
+		tcp_syncookies = (i & 0x8) ? 2 : 0;
+
+		write_int_sysctl(PROC_TCP_TIMESTAMPS, tcp_timestamps);
+		write_int_sysctl(PROC_TCP_WINDOW_SCALING, tcp_window_scaling);
+		write_int_sysctl(PROC_TCP_WORKAROUND_SIGNED_WINDOWS,
+				 tcp_workaround_signed_windows);
+		write_int_sysctl(PROC_TCP_SYNCOOKIES, tcp_syncookies);
+
+		// Without tcp timestamps syncookies can't do wscale
+		if (tcp_syncookies && tcp_timestamps == 0)
+			tcp_window_scaling = 0;
+
+		run_tests(cg_fd, skel);
+	}
+
+	write_int_sysctl(PROC_TCP_TIMESTAMPS, saved_tcp_timestamps);
+	write_int_sysctl(PROC_TCP_WINDOW_SCALING, saved_tcp_window_scaling);
+	write_int_sysctl(PROC_TCP_WORKAROUND_SIGNED_WINDOWS,
+			 saved_tcp_workaround_signed_windows);
+	write_int_sysctl(PROC_TCP_SYNCOOKIES, saved_tcp_syncookies);
+
+	test_tcp_initrwnd__destroy(skel);
+
+	close(cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_initrwnd.c b/tools/testing/selftests/bpf/progs/test_tcp_initrwnd.c
new file mode 100644
index 000000000000..d532e9e2d344
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_initrwnd.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2022 Cloudflare
+
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+int initrwnd;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int op;
+
+	op = (int)skops->op;
+
+	switch (op) {
+	case BPF_SOCK_OPS_RWND_INIT:
+		rv = initrwnd;
+		break;
+
+	default:
+		rv = -1;
+	}
+	skops->reply = rv;
+	return 1;
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
-- 
2.25.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux