This adds utility to check vsock receive throughput. Signed-off-by: Arseniy Krasnov <AVKrasnov@xxxxxxxxxxxxxx> --- tools/testing/vsock/Makefile | 1 + tools/testing/vsock/vsock_rx_perf.c | 604 ++++++++++++++++++++++++++++ 2 files changed, 605 insertions(+) create mode 100644 tools/testing/vsock/vsock_rx_perf.c diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 7172c21fbd8d..5aea346ba2bc 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -3,6 +3,7 @@ all: test test: vsock_test vsock_diag_test vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o +vsock_rx_perf: vsock_rx_perf.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean diff --git a/tools/testing/vsock/vsock_rx_perf.c b/tools/testing/vsock/vsock_rx_perf.c new file mode 100644 index 000000000000..323626089043 --- /dev/null +++ b/tools/testing/vsock/vsock_rx_perf.c @@ -0,0 +1,604 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vsock_rx_perf - benchmark utility for zerocopy receive. + * + * Copyright (C) 2022 SberDevices. + * + * Author: Arseniy Krasnov <AVKrasnov@xxxxxxxxxxxxxx> + */ +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <time.h> +#include <sys/mman.h> +#include <stdint.h> +#include <poll.h> +#include <uapi/linux/virtio_vsock.h> +#include <uapi/linux/vm_sockets.h> +#include <sys/socket.h> +#include <linux/vm_sockets.h> + +#define PAGE_SIZE 4096 +#define DEFAULT_BUF_SIZE_BYTES (128*1024) +#define DEFAULT_TO_SEND_BYTES (65*1024) +#define DEFAULT_VSOCK_BUF_BYTES (256*1024) +#define DEFAULT_RCVLOWAT_BYTES 1 +#define DEFAULT_PORT 1234 + +static bool client_mode = true; +static int peer_cid = -1; +static int port = DEFAULT_PORT; +static unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; +static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; +static unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; +static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; + +#define ZEROCOPY_MODE_NO 0 +#define ZEROCOPY_MODE_POLL 1 +#define ZEROCOPY_MODE_USER_POLL 2 + +static int zerocopy_mode = ZEROCOPY_MODE_NO; + +static inline time_t current_nsec(void) +{ + struct timespec ts; + + if (clock_gettime(CLOCK_REALTIME, &ts)) { + perror("clock_gettime"); + exit(EXIT_FAILURE); + } + + return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec; +} + +/* From lib/cmdline.c. */ +static unsigned long memparse(const char *ptr) +{ + char *endptr; + + unsigned long long ret = strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + return ret; +} + +static void map_rx(int fd, void *va) +{ + socklen_t len = sizeof(va); + + if (getsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_MAP_RX, &va, &len) < 0) { + perror("getsockopt"); + exit(EXIT_FAILURE); + } +} + +static void vsock_increase_buf_size(int fd) +{ + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, + &vsock_buf_bytes, sizeof(vsock_buf_bytes))) { + perror("setsockopt"); + exit(EXIT_FAILURE); + } +} + +static int vsock_connect(unsigned int cid, unsigned int port) +{ + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = cid, + }, + }; + unsigned long zc_on; + int fd; + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) + return -1; + + vsock_increase_buf_size(fd); + + zc_on = 1; + + if (zerocopy_mode != ZEROCOPY_MODE_NO) { + if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_ZEROCOPY, + (void *)&zc_on, sizeof(zc_on))) { + close(fd); + return -1; + } + } + + if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) { + close(fd); + return -1; + } + + return fd; +} + +static void run_server(void) +{ + time_t tx_begin_ns; + size_t total_send; + int client_fd; + char *data; + int fd; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } addr = { + .svm = { + .svm_family = AF_VSOCK, + .svm_port = port, + .svm_cid = VMADDR_CID_ANY, + }, + }; + union { + struct sockaddr sa; + struct sockaddr_vm svm; + } clientaddr; + + socklen_t clientaddr_len = sizeof(clientaddr.svm); + + fprintf(stderr, "Run as server, listen %i, send %lu, tx buf %lu vsock buf %lu\n", + port, to_send_bytes, buf_size_bytes, + vsock_buf_bytes); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) { + perror("bind"); + exit(EXIT_FAILURE); + } + + if (listen(fd, 1) < 0) { + perror("listen"); + exit(EXIT_FAILURE); + } + + client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); + + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_increase_buf_size(client_fd); + vsock_increase_buf_size(fd); + + data = malloc(buf_size_bytes); + + if (!data) { + fprintf(stderr, "malloc failed\n"); + close(client_fd); + close(fd); + exit(EXIT_FAILURE); + } + + memset(data, 0, buf_size_bytes); + total_send = 0; + tx_begin_ns = current_nsec(); + + while (total_send < to_send_bytes) { + ssize_t sent; + + sent = write(client_fd, data, buf_size_bytes); + + if (sent <= 0) { + perror("write"); + exit(EXIT_FAILURE); + } + + total_send += sent; + } + + fprintf(stderr, "Tx loop time %f sec\n", + (float)(current_nsec() - tx_begin_ns) / (1000000000.0)); + + close(client_fd); + close(fd); + + free(data); +} + +static void run_client(void) +{ + unsigned int read_cnt; + time_t rx_begin_ns; + time_t in_read_ns; + void *data; + int fd; + + fprintf(stderr, "Running client, copy, peer %i:%i, rx buf %lu, vsock buf %lu\n", + peer_cid, port, buf_size_bytes, vsock_buf_bytes); + + fd = vsock_connect(peer_cid, port); + + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, + &rcvlowat_bytes, + sizeof(rcvlowat_bytes))) { + perror("setsockopt 1"); + exit(EXIT_FAILURE); + } + + data = mmap(NULL, buf_size_bytes, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_POPULATE, -1, 0); + + if (data == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + in_read_ns = 0; + read_cnt = 0; + rx_begin_ns = current_nsec(); + + while (1) { + struct pollfd fds = { 0 }; + + fds.fd = fd; + fds.events = POLLIN | POLLERR | POLLHUP | + POLLRDHUP | POLLNVAL; + + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & POLLERR) { + fprintf(stderr, "Poll error\n"); + break; + } + + if (fds.revents & (POLLHUP | POLLRDHUP)) + break; + + if (fds.revents & POLLIN) { + ssize_t bytes_read; + time_t t; + + t = current_nsec(); + bytes_read = read(fd, data, buf_size_bytes); + in_read_ns += (current_nsec() - t); + read_cnt++; + + if (!bytes_read) + break; + + if (bytes_read < 0) { + perror("recv"); + exit(EXIT_FAILURE); + } + } + } + + fprintf(stderr, "Rx loop time %f sec\n", + (float)(current_nsec() - rx_begin_ns) / (1000000000.0)); + fprintf(stderr, "Total in 'read()' %f sec\n", in_read_ns / 1000000000.0); + fprintf(stderr, "POLLIN wakeups: %i\n", read_cnt); + fprintf(stderr, "Average in 'read()' %f ns\n", + (float)in_read_ns / read_cnt); + + munmap(data, buf_size_bytes); + close(fd); +} + +static void run_client_zerocopy(void) +{ + unsigned int rx_cnt; + time_t rx_begin_ns; + time_t in_rx_ns; + void *rx_va; + int done; + int fd; + + fprintf(stderr, "Running client, zerocopy, peer %i:%i, rx buf %lu, vsock buf %lu\n", + peer_cid, port, buf_size_bytes, vsock_buf_bytes); + + fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + if (fd < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + fd = vsock_connect(peer_cid, port); + + if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, + &rcvlowat_bytes, + sizeof(rcvlowat_bytes))) { + perror("setsockopt 1"); + exit(EXIT_FAILURE); + } + + rx_va = mmap(NULL, buf_size_bytes, PROT_READ, MAP_SHARED, fd, 0); + + if (rx_va == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + done = 0; + + in_rx_ns = 0; + rx_cnt = 0; + rx_begin_ns = current_nsec(); + + while (1) { + struct pollfd fds = { 0 }; + + fds.fd = fd; + fds.events = POLLIN | POLLERR | POLLHUP | + POLLRDHUP | POLLNVAL; + + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + if (fds.revents & (POLLHUP | POLLRDHUP)) + done = 1; + + if (fds.revents & POLLERR) { + fprintf(stderr, "Poll error\n"); + break; + } + + if (fds.revents & POLLIN) { + struct virtio_vsock_usr_hdr_pref *hdr_pref; + size_t t; + + t = current_nsec(); + map_rx(fd, rx_va); + in_rx_ns += (current_nsec() - t); + + hdr_pref = (struct virtio_vsock_usr_hdr_pref *)rx_va; + + if (!hdr_pref->hdr_num && done) + break; + + t = current_nsec(); + if (madvise((void *)rx_va + PAGE_SIZE, buf_size_bytes - PAGE_SIZE, + MADV_DONTNEED)) { + perror("madvise"); + exit(EXIT_FAILURE); + } + in_rx_ns += (current_nsec() - t); + rx_cnt++; + } + + if (done) + break; + } + + fprintf(stderr, "Rx loop time %f sec\n", + (float)(current_nsec() - rx_begin_ns) / (1000000000.0)); + fprintf(stderr, "Total in 'getsockopt()' + 'madvise()' %f sec\n", + in_rx_ns / 1000000000.0); + fprintf(stderr, "POLLIN wakeups: %i\n", rx_cnt); + fprintf(stderr, "Average in 'getsockopt()' + 'madvise()' %f ns\n", + (float)in_rx_ns / rx_cnt); + + close(fd); +} + +static void run_client_user_poll(void) +{ + unsigned int rx_cnt; + time_t rx_begin_ns; + time_t in_rx_ns; + u32 poll_value; + void *rx_va; + int fd; + + fprintf(stderr, "Running client, user poll, peer %i:%i, rx buf %lu, vsock buf %lu\n", + peer_cid, port, buf_size_bytes, vsock_buf_bytes); + + fd = vsock_connect(peer_cid, port); + + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + rx_va = mmap(NULL, buf_size_bytes, PROT_READ, MAP_SHARED, fd, 0); + + if (rx_va == MAP_FAILED) { + perror("mmap"); + exit(EXIT_FAILURE); + } + + poll_value = 0; + in_rx_ns = 0; + rx_cnt = 0; + rx_begin_ns = current_nsec(); + + while (1) { + volatile struct virtio_vsock_usr_hdr_pref *poll_hdr; + int leave_loop = 0; + size_t t; + + poll_hdr = (struct virtio_vsock_usr_hdr_pref *)rx_va; + + if (poll_value != ~0) { + do { + poll_value = poll_hdr->poll_value; + } while (!poll_value); + } + + t = current_nsec(); + map_rx(fd, rx_va); + in_rx_ns += (current_nsec() - t); + + if (!poll_hdr->hdr_num && (poll_value == ~0)) + leave_loop = 1; + + t = current_nsec(); + if (madvise((void *)rx_va + PAGE_SIZE, + buf_size_bytes - PAGE_SIZE, + MADV_DONTNEED)) { + perror("madvise"); + exit(EXIT_FAILURE); + } + in_rx_ns += (current_nsec() - t); + rx_cnt++; + + if (leave_loop) + break; + } + + fprintf(stderr, "Rx loop time %f sec\n", + (float)(current_nsec() - rx_begin_ns) / (1000000000.0)); + fprintf(stderr, "Total in 'getsockopt()' + 'madvise()' %f sec\n", + in_rx_ns / 1000000000.0); + fprintf(stderr, "Busyloop wakeups: %i\n", rx_cnt); + fprintf(stderr, "Average in 'getsockopt()' + 'madvise()' %f ns\n", + (float)in_rx_ns / rx_cnt); +} + +static void usage(void) +{ + fprintf(stderr, "Usage: vsock_rx_perf [-h] -m c|s\n" + "-z n|y|u -c <cid> -m <megabytes to send>\n" + "-b <buffer size>\n" + "\n" + "Server: vsock_rx_perf -m s\n" + "This is benchmarking utility, to test vsock receive performance.\n" + "It runs in two modes: server or client. In server mode, it waits\n" + "connection from client, and when established, server starts data\n" + "transmission. Total size of data to send is set by '-m' option.\n" + "\n" + "Client could read this data in three different modes:\n" + "1) Using 'read()' system call. Default mode.\n" + "2) Zerocopy mode, use 'poll()' to wait data.\n" + "3) Zerocopy mode, use busyloop in userpace to wait data.\n" + "\n" + "Meaning of '-b' depends of server or client mode. In server\n" + "mode, it is size of tx buffer, passed to 'write()'. In client mode,\n" + "it is size of rx buffer(without zerocopy) passed to 'read()'. With\n" + "zerocopy enabled, it is size of rx mapping.\n" + "\n" + "Options:\n" + " -h This help message\n" + " -m c|s Server or client(client default)\n" + " -p <port> Port\n" + " -z n|y|u Data waiting mode\n" + " -c <cid> CID of the peer\n" + " -m <megabytes to send> Megabytes to send\n" + " -b <buffer size> Depends on server/client mode\n" + "\n"); + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "sc:p:m:b:z:r:hv:")) != -1) { + switch (c) { + case 'v': + vsock_buf_bytes = memparse(optarg); + break; + case 'r': + rcvlowat_bytes = memparse(optarg); + break; + case 's': + client_mode = false; + break; + case 'c': + peer_cid = atoi(optarg); + break; + case 'p': + port = atoi(optarg); + break; + case 'm': + to_send_bytes = memparse(optarg); + break; + case 'b': + buf_size_bytes = memparse(optarg); + break; + case 'z': + if (!strcmp(optarg, "n")) + zerocopy_mode = ZEROCOPY_MODE_NO; + else if (!strcmp(optarg, "y")) + zerocopy_mode = ZEROCOPY_MODE_POLL; + else if (!strcmp(optarg, "u")) + zerocopy_mode = ZEROCOPY_MODE_USER_POLL; + else + usage(); + break; + case 'h': + usage(); + break; + default: + usage(); + + } + } + + if (client_mode) { + switch (zerocopy_mode) { + case ZEROCOPY_MODE_NO: + run_client(); + break; + case ZEROCOPY_MODE_POLL: + run_client_zerocopy(); + break; + case ZEROCOPY_MODE_USER_POLL: + run_client_user_poll(); + break; + } + } else { + run_server(); + } + + return 0; +} -- 2.35.0