We have found cases where single threaded IO applications really suffer from the CPU to connection mapping on NBD. With the OSS nbd server you can run out of threads to meaningfully service all of the IO coming in over one connection which will cut your throughput in half. The internal FB server uses folly's async socket which has one thread reading and writing to the socket which limits throughput. Having a naive round robin approach in the nbd driver itself to round robin on all of its available sockets allows for us to get 2-3x performance improvement in the single threaded IO workload as we essentially force the user space server to get more balanced traffic. This is ok from an NBD perspective because we don't really care which socket the request goes out, and in fact if there is a connection failure on one socket we'll re-route requests to other sockets without issue. Signed-off-by: Josef Bacik <josef@xxxxxxxxxxxxxx> --- drivers/block/nbd.c | 14 ++++++++++++++ include/uapi/linux/nbd.h | 2 ++ 2 files changed, 16 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 90ba9f4c03f3..53463217fbe9 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -77,6 +77,7 @@ struct link_dead_args { #define NBD_BOUND 5 #define NBD_DESTROY_ON_DISCONNECT 6 #define NBD_DISCONNECT_ON_CLOSE 7 +#define NBD_ROUND_ROBIN 8 struct nbd_config { u32 flags; @@ -84,6 +85,7 @@ struct nbd_config { u64 dead_conn_timeout; struct nbd_sock **socks; + atomic_t connection_counter; int num_connections; atomic_t live_connections; wait_queue_head_t conn_wait; @@ -830,6 +832,10 @@ static int nbd_handle_cmd(struct nbd_cmd *cmd, int index) blk_mq_start_request(req); return -EINVAL; } + if (test_bit(NBD_ROUND_ROBIN, &config->runtime_flags)) + index = (atomic_inc_return(&config->connection_counter) % + config->num_connections); + cmd->status = BLK_STS_OK; again: nsock = config->socks[index]; @@ -1322,6 +1328,7 @@ static struct nbd_config *nbd_alloc_config(void) init_waitqueue_head(&config->conn_wait); config->blksize = 1024; atomic_set(&config->live_connections, 0); + atomic_set(&config->connection_counter, 0); try_module_get(THIS_MODULE); return config; } @@ -1782,6 +1789,8 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) set_bit(NBD_DISCONNECT_ON_CLOSE, &config->runtime_flags); } + if (flags & NBD_CFLAG_ROUND_ROBIN) + set_bit(NBD_ROUND_ROBIN, &config->runtime_flags); } if (info->attrs[NBD_ATTR_SOCKETS]) { @@ -1953,6 +1962,11 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) clear_bit(NBD_DISCONNECT_ON_CLOSE, &config->runtime_flags); } + + if (flags & NBD_CFLAG_ROUND_ROBIN) + set_bit(NBD_ROUND_ROBIN, &config->runtime_flags); + else + clear_bit(NBD_ROUND_ROBIN, &config->runtime_flags); } if (info->attrs[NBD_ATTR_SOCKETS]) { diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 20d6cc91435d..ea74ba420dfa 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -56,6 +56,8 @@ enum { #define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on * close by last opener. */ +#define NBD_CFLAG_ROUND_ROBIN (1 << 2) /* round robin requests on the + * connections for the device. */ /* userspace doesn't need the nbd_device structure */ -- 2.14.3