Currently, the hv_sock buffer size is static and can't scale to the bandwidth requirements of the application. This change allows the applications to influence the socket buffer sizes using the SO_SNDBUF and the SO_RCVBUF socket options. Few interesting points to note: 1. Since the VMBUS does not allow a resize operation of the ring size, the socket buffer size option should be set prior to establishing the connection for it to take effect. 2. Setting the socket option comes with the cost of that much memory being reserved/allocated by the kernel, for the lifetime of the connection. Perf data: Total Data Transfer: 1GB Single threaded reader/writer Results below are summarized over 10 iterations. Linux hvsocket writer + Windows hvsocket reader: |---------------------------------------------------------------------------------------------| |Packet size -> | 128B | 1KB | 4KB | 64KB | |---------------------------------------------------------------------------------------------| |SO_SNDBUF size | | Throughput in MB/s (min/max/avg/median): | | v | | |---------------------------------------------------------------------------------------------| | Default | 109/118/114/116 | 636/774/701/700 | 435/507/480/476 | 410/491/462/470 | | 16KB | 110/116/112/111 | 575/705/662/671 | 749/900/854/869 | 592/824/692/676 | | 32KB | 108/120/115/115 | 703/823/767/772 | 718/878/850/866 | 1593/2124/2000/2085 | | 64KB | 108/119/114/114 | 592/732/683/688 | 805/934/903/911 | 1784/1943/1862/1843 | |---------------------------------------------------------------------------------------------| Windows hvsocket writer + Linux hvsocket reader: |---------------------------------------------------------------------------------------------| |Packet size -> | 128B | 1KB | 4KB | 64KB | |---------------------------------------------------------------------------------------------| |SO_RCVBUF size | | Throughput in MB/s (min/max/avg/median): | | v | | |---------------------------------------------------------------------------------------------| | Default | 69/82/75/73 | 313/343/333/336 | 418/477/446/445 | 659/701/676/678 | | 16KB | 69/83/76/77 | 350/401/375/382 | 506/548/517/516 | 602/624/615/615 | | 32KB | 62/83/73/73 | 471/529/496/494 | 830/1046/935/939 | 944/1180/1070/1100 | | 64KB | 64/70/68/69 | 467/533/501/497 | 1260/1590/1430/1431 | 1605/1819/1670/1660 | |---------------------------------------------------------------------------------------------| Signed-off-by: Sunil Muthuswamy <sunilmut@xxxxxxxxxxxxx> --- - The table above exceeds the 75char limit for the patch. If that's a problem, I can try to squeeze it in somehow within the limit. - The patch has been previously submitted to net and reviewed. The feedback was to submit it to net-next. net/vmw_vsock/hyperv_transport.c | 50 ++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 982a8dc..8d3a7b0 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -23,14 +23,14 @@ #include <net/sock.h> #include <net/af_vsock.h> -/* The host side's design of the feature requires 6 exact 4KB pages for - * recv/send rings respectively -- this is suboptimal considering memory - * consumption, however unluckily we have to live with it, before the - * host comes up with a better design in the future. +/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some + * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer + * hosts don't have this limitation; but, keep the defaults the same for compat. */ #define PAGE_SIZE_4K 4096 #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6) #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6) +#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64) /* The MTU is 16KB per the host side's design */ #define HVS_MTU_SIZE (1024 * 16) @@ -344,9 +344,12 @@ static void hvs_open_connection(struct vmbus_channel *chan) struct sockaddr_vm addr; struct sock *sk, *new = NULL; - struct vsock_sock *vnew; - struct hvsock *hvs, *hvs_new; + struct vsock_sock *vnew = NULL; + struct hvsock *hvs = NULL; + struct hvsock *hvs_new = NULL; + int rcvbuf; int ret; + int sndbuf; if_type = &chan->offermsg.offer.if_type; if_instance = &chan->offermsg.offer.if_instance; @@ -388,9 +391,34 @@ static void hvs_open_connection(struct vmbus_channel *chan) } set_channel_read_mode(chan, HV_CALL_DIRECT); - ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE, - RINGBUFFER_HVS_RCV_SIZE, NULL, 0, - hvs_channel_cb, conn_from_host ? new : sk); + + /* Use the socket buffer sizes as hints for the VMBUS ring size. For + * server side sockets, 'sk' is the parent socket and thus, this will + * allow the child sockets to inherit the size from the parent. Keep + * the mins to the default value and align to page size as per VMBUS + * requirements. + * For the max, the socket core library will limit the socket buffer + * size that can be set by the user, but, since currently, the hv_sock + * VMBUS ring buffer is physically contiguous allocation, restrict it + * further. + * Older versions of hv_sock host side code cannot handle bigger VMBUS + * ring buffer size. Use the version number to limit the change to newer + * versions. + */ + if (vmbus_proto_version < VERSION_WIN10_V5) { + sndbuf = RINGBUFFER_HVS_SND_SIZE; + rcvbuf = RINGBUFFER_HVS_RCV_SIZE; + } else { + sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE); + sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE); + sndbuf = ALIGN(sndbuf, PAGE_SIZE); + rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE); + rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE); + rcvbuf = ALIGN(rcvbuf, PAGE_SIZE); + } + + ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb, + conn_from_host ? new : sk); if (ret != 0) { if (conn_from_host) { hvs_new->chan = NULL; @@ -441,6 +469,7 @@ static u32 hvs_get_local_cid(void) static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk) { struct hvsock *hvs; + struct sock *sk = sk_vsock(vsk); hvs = kzalloc(sizeof(*hvs), GFP_KERNEL); if (!hvs) @@ -448,7 +477,8 @@ static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->trans = hvs; hvs->vsk = vsk; - + sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE; + sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE; return 0; } -- 2.7.4