From: Roman Stratiienko <roman.stratiienko@xxxxxxxxxxxxxxx> Adding support to nbd to use it as a root device. This code essentially provides a minimal nbd-client implementation within the kernel. It opens a socket and makes the negotiation with the server. Afterwards it passes the socket to the normal nbd-code to handle the connection. The arguments for the server are passed via kernel command line. The kernel command line has the format 'nbdroot=[<SERVER_IP>:]<SERVER_PORT>/<EXPORT_NAME>'. SERVER_IP is optional. If it is not available it will use the root_server_addr transmitted through DHCP. Based on those arguments, the connection to the server is established and is connected to the nbd0 device. The rootdevice therefore is root=/dev/nbd0. Patch was initialy posted by Markus Pargmann <mpa@xxxxxxxxxxxxxx> and can be found at https://lore.kernel.org/patchwork/patch/532556/ Change-Id: I78f7313918bf31b9dc01a74a42f0f068bede312c Signed-off-by: Roman Stratiienko <roman.stratiienko@xxxxxxxxxxxxxxx> Reviewed-by: Aleksandr Bulyshchenko <A.Bulyshchenko@xxxxxxxxxxxxxxx> --- drivers/block/Kconfig | 19 +++ drivers/block/nbd.c | 294 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 313 insertions(+) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 20bb4bfa4be6..e17f2376de60 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -273,6 +273,25 @@ config BLK_DEV_NBD If unsure, say N. +config BLK_DEV_NBDROOT + bool "Early network block device client support" + depends on BLK_DEV_NBD=y + ---help--- + Saying yes will enable kernel NBD client support. This allows to + connect entire disk with multiple partitions before mounting rootfs. + + The arguments for the server are passed via kernel command line. + The kernel command line has the format + 'nbdroot=[<SERVER_IP>:]<SERVER_PORT>/<EXPORT_NAME>'. + SERVER_IP is optional. If it is not available it will use the + root_server_addr transmitted through DHCP. + + Based on those arguments, the connection to the server is established + and is connected to the nbd0 device. The rootdevice therefore is + root=/dev/nbd0. + + If unsure, say N. + config BLK_DEV_SKD tristate "STEC S1120 Block Driver" depends on PCI diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 63fcfb38e640..cb5e60419e07 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -46,6 +46,35 @@ #define CREATE_TRACE_POINTS #include <trace/events/nbd.h> +#include <net/ipconfig.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/nfs_fs.h> +#include <linux/nfs.h> + +#define ADDR_NONE cpu_to_be32(INADDR_NONE) + +static const char nbd_magic[] = "NBDMAGIC"; +static const u64 nbd_opts_magic = 0x49484156454F5054LL; + +/* Options used for the kernel driver */ +#define NBD_OPT_EXPORT_NAME 1 + +#define NBD_DEFAULT_BLOCKSIZE 512 /* bytes */ + +#define NBD_DEFAULT_TIMEOUT 2 /* seconds */ + +#define NBD_MAXPATHLEN NFS_MAXPATHLEN + +struct nbdroot { + const char *bdev; + __be32 server_addr; + __be32 server_port; + loff_t block_size; + int timeout; + char server_export[NBD_MAXPATHLEN + 1]; +}; + static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static int nbd_total_devices = 0; @@ -441,6 +470,16 @@ static int sock_xmit(struct socket *sock, int send, return result; } +static int sock_xmit_buf(struct socket *sock, int send, + void *buf, size_t size) +{ + struct iov_iter iter; + struct kvec iov = {.iov_base = buf, .iov_len = size}; + + iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, size); + return sock_xmit(sock, send, &iter, 0, 0); +} + static int nbd_xmit(struct nbd_device *nbd, int index, int send, struct iov_iter *iter, int msg_flags, int *sent) { @@ -2301,6 +2340,261 @@ static void __exit nbd_cleanup(void) unregister_blkdev(NBD_MAJOR, "nbd"); } +#ifdef CONFIG_BLK_DEV_NBDROOT + +struct nbdroot nbdroot_0 = {.bdev = "nbd0", + .server_export = "", + .server_addr = ADDR_NONE, + .timeout = NBD_DEFAULT_TIMEOUT, + .block_size = NBD_DEFAULT_BLOCKSIZE}; + +static int nbd_connect(struct nbdroot *nbdroot, struct socket **socket) +{ + struct socket *sock; + struct sockaddr_in sockaddr; + int err; + char val; + + err = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, + IPPROTO_TCP, &sock); + if (err < 0) + return err; + + sockaddr.sin_family = AF_INET; + sockaddr.sin_addr.s_addr = nbdroot->server_addr; + sockaddr.sin_port = nbdroot->server_port; + + val = 1; + sock->ops->setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, + sizeof(val)); + + err = sock->ops->connect(sock, (struct sockaddr *)&sockaddr, + sizeof(sockaddr), 0); + if (err < 0) + return err; + + *socket = sock; + + return 0; +} + +static int nbd_connection_negotiate(struct socket *sock, char *export_name, + size_t *rsize, u16 *nflags) +{ + char buf[256]; + int ret; + u64 magic; + u16 flags; + u32 client_flags; + u32 opt; + u32 name_len; + u64 nbd_size; + + ret = sock_xmit_buf(sock, 0, buf, 8); + if (ret < 0) + return ret; + + if (strncmp(buf, nbd_magic, 8)) + return -EINVAL; + + ret = sock_xmit_buf(sock, 0, &magic, sizeof(magic)); + if (ret < 0) + return ret; + magic = be64_to_cpu(magic); + + if (magic != nbd_opts_magic) + return -EINVAL; + + ret = sock_xmit_buf(sock, 0, &flags, sizeof(flags)); + if (ret < 0) + return ret; + + *nflags = ntohs(flags); + + client_flags = 0; + + ret = sock_xmit_buf(sock, 1, &client_flags, sizeof(client_flags)); + if (ret < 0) + return ret; + + magic = cpu_to_be64(nbd_opts_magic); + ret = sock_xmit_buf(sock, 1, &magic, sizeof(magic)); + if (ret < 0) + return ret; + + opt = htonl(NBD_OPT_EXPORT_NAME); + ret = sock_xmit_buf(sock, 1, &opt, sizeof(opt)); + if (ret < 0) + return ret; + + name_len = strlen(export_name) + 1; + name_len = htonl(name_len); + ret = sock_xmit_buf(sock, 1, &name_len, sizeof(name_len)); + if (ret < 0) + return ret; + + ret = sock_xmit_buf(sock, 1, export_name, strlen(export_name) + 1); + if (ret < 0) + return ret; + + ret = sock_xmit_buf(sock, 0, &nbd_size, sizeof(nbd_size)); + if (ret < 0) + return ret; + nbd_size = be64_to_cpu(nbd_size); + + ret = sock_xmit_buf(sock, 0, &flags, sizeof(flags)); + if (ret < 0) + return ret; + *nflags = ntohs(flags); + + ret = sock_xmit_buf(sock, 0, buf, 124); + if (ret < 0) + return ret; + + *rsize = nbd_size; + + return 0; +} + +static int nbd_bind_connection(struct nbdroot *nbdroot, struct nbd_device *nbd, + struct socket *sock, size_t rsize, u32 flags) +{ + int conn, ret; + struct block_device *bdev = blkdev_get_by_dev(disk_devt(nbd->disk), + FMODE_READ | FMODE_WRITE, 0); + + if (IS_ERR(bdev)) { + pr_err("nbdroot: blkdev_get_by_dev failed %ld\n", + PTR_ERR(bdev)); + return PTR_ERR(bdev); + } + + conn = nbd->config->num_connections; + ret = nbd_add_socket(nbd, sock, false); + if (ret) { + pr_err("nbdroot: add socket failed %d\n", ret); + return ret; + } + + mutex_lock(&nbd->config->socks[conn]->tx_lock); + + nbd->config->flags = flags; + + nbd_size_set(nbd, nbdroot->block_size, + div_s64(rsize, nbdroot->block_size)); + + nbd->tag_set.timeout = nbdroot->timeout * HZ; + blk_queue_rq_timeout(nbd->disk->queue, nbdroot->timeout * HZ); + + mutex_unlock(&nbd->config->socks[conn]->tx_lock); + + ret = nbd_start_device_ioctl(nbd, bdev); + if (ret) { + pr_err("nbdroot: start device ioctl failed %d\n", ret); + return ret; + } + + return 0; +} + +static int nbdroot_thread(void *arg) +{ + struct nbdroot *nbdroot = (struct nbdroot *)arg; + struct socket *sock = 0; + size_t rsize; + u16 nflags; + int ret; + dev_t devt = blk_lookup_devt(nbdroot->bdev, 0); + struct gendisk *disk = get_gendisk(devt, &ret); + struct nbd_device *nbd = (struct nbd_device *)disk->private_data; + + ret = nbd_connect(nbdroot, &sock); + if (ret) { + pr_err("nbdroot: connect failed %d\n", ret); + goto err; + } + + ret = nbd_connection_negotiate(sock, nbdroot->server_export, + &rsize, &nflags); + if (ret) { + pr_err("nbdroot: negotiation failed %d\n", ret); + goto err; + } + + ret = nbd_bind_connection(nbdroot, nbd, sock, rsize, nflags); + if (ret) { + pr_err("nbdroot: nbd_bind_connection failed %d\n", ret); + goto err; + } + return 0; + +err: + pr_err("nbdroot: %s init failed, IP: %pI4, port: %i, export: %s\n", + nbdroot->bdev, &nbdroot->server_addr, + ntohs(nbdroot->server_port), nbdroot->server_export); + + if (sock) + sock_release(sock); + + return ret; +} + +static int __init nbdroot_init(void) +{ + if (nbdroot_0.server_port != 0) + kthread_run(nbdroot_thread, &nbdroot_0, "nbdroot_0"); + + return 0; +} + +/* We need this in late_initcall_sync to be sure that the network is setup */ +late_initcall_sync(nbdroot_init); + +/* + * Parse format "[<SERVER_IP>:]<SERVER_PORT>/<EXPORT_NAME>" + */ +static int __init nbdroot_setup(char *line) +{ + struct nbdroot *nbdroot = &nbdroot_0; + char *export; + u16 port; + int ret; + char buf[NBD_MAXPATHLEN + 1]; + + strlcpy(buf, line, sizeof(buf) - 1); + + nbdroot->server_addr = root_nfs_parse_addr(buf); + + if (*buf == '\0') + return -EINVAL; + + if (nbdroot->server_addr == ADDR_NONE) { + if (root_server_addr == ADDR_NONE) { + pr_err("nbdroot: Failed to find server address\n"); + return -EINVAL; + } + nbdroot->server_addr = root_server_addr; + } + + export = strchr(buf, '/'); + *export = '\0'; + ++export; + + ret = kstrtou16(buf, 10, &port); + if (ret) + return ret; + + nbdroot->server_port = htons(port); + strlcpy(nbdroot->server_export, export, + sizeof(nbdroot->server_export) - 1); + + return 0; +} + +__setup("nbdroot=", nbdroot_setup); + +#endif /* CONFIG_BLK_DEV_NBDROOT */ + module_init(nbd_init); module_exit(nbd_cleanup); -- 2.17.1