[PATCH 2/2] nbd: add support for nbd as root device

From: roman . stratiienko
Date: Wed Jun 12 2019 - 12:36:37 EST


From: Roman Stratiienko <roman.stratiienko@xxxxxxxxxxxxxxx>

Adding support to nbd to use it as a root device. This code essentially
provides a minimal nbd-client implementation within the kernel. It opens
a socket and makes the negotiation with the server. Afterwards it passes
the socket to the normal nbd-code to handle the connection.

The arguments for the server are passed via kernel command line.
The kernel command line has the format
'nbdroot=[<SERVER_IP>:]<SERVER_PORT>/<EXPORT_NAME>'.
SERVER_IP is optional. If it is not available it will use the
root_server_addr transmitted through DHCP.

Based on those arguments, the connection to the server is established
and is connected to the nbd0 device. The rootdevice therefore is
root=/dev/nbd0.

Patch was initialy posted by Markus Pargmann <mpa@xxxxxxxxxxxxxx>
and can be found at https://lore.kernel.org/patchwork/patch/532556/

Change-Id: I78f7313918bf31b9dc01a74a42f0f068bede312c
Signed-off-by: Roman Stratiienko <roman.stratiienko@xxxxxxxxxxxxxxx>
Reviewed-by: Aleksandr Bulyshchenko <A.Bulyshchenko@xxxxxxxxxxxxxxx>
---
drivers/block/Kconfig | 19 +++
drivers/block/nbd.c | 294 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 313 insertions(+)

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 20bb4bfa4be6..e17f2376de60 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -273,6 +273,25 @@ config BLK_DEV_NBD

If unsure, say N.

+config BLK_DEV_NBDROOT
+ bool "Early network block device client support"
+ depends on BLK_DEV_NBD=y
+ ---help---
+ Saying yes will enable kernel NBD client support. This allows to
+ connect entire disk with multiple partitions before mounting rootfs.
+
+ The arguments for the server are passed via kernel command line.
+ The kernel command line has the format
+ 'nbdroot=[<SERVER_IP>:]<SERVER_PORT>/<EXPORT_NAME>'.
+ SERVER_IP is optional. If it is not available it will use the
+ root_server_addr transmitted through DHCP.
+
+ Based on those arguments, the connection to the server is established
+ and is connected to the nbd0 device. The rootdevice therefore is
+ root=/dev/nbd0.
+
+ If unsure, say N.
+
config BLK_DEV_SKD
tristate "STEC S1120 Block Driver"
depends on PCI
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 63fcfb38e640..cb5e60419e07 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -46,6 +46,35 @@
#define CREATE_TRACE_POINTS
#include <trace/events/nbd.h>

+#include <net/ipconfig.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs.h>
+
+#define ADDR_NONE cpu_to_be32(INADDR_NONE)
+
+static const char nbd_magic[] = "NBDMAGIC";
+static const u64 nbd_opts_magic = 0x49484156454F5054LL;
+
+/* Options used for the kernel driver */
+#define NBD_OPT_EXPORT_NAME 1
+
+#define NBD_DEFAULT_BLOCKSIZE 512 /* bytes */
+
+#define NBD_DEFAULT_TIMEOUT 2 /* seconds */
+
+#define NBD_MAXPATHLEN NFS_MAXPATHLEN
+
+struct nbdroot {
+ const char *bdev;
+ __be32 server_addr;
+ __be32 server_port;
+ loff_t block_size;
+ int timeout;
+ char server_export[NBD_MAXPATHLEN + 1];
+};
+
static DEFINE_IDR(nbd_index_idr);
static DEFINE_MUTEX(nbd_index_mutex);
static int nbd_total_devices = 0;
@@ -441,6 +470,16 @@ static int sock_xmit(struct socket *sock, int send,
return result;
}

+static int sock_xmit_buf(struct socket *sock, int send,
+ void *buf, size_t size)
+{
+ struct iov_iter iter;
+ struct kvec iov = {.iov_base = buf, .iov_len = size};
+
+ iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, size);
+ return sock_xmit(sock, send, &iter, 0, 0);
+}
+
static int nbd_xmit(struct nbd_device *nbd, int index, int send,
struct iov_iter *iter, int msg_flags, int *sent)
{
@@ -2301,6 +2340,261 @@ static void __exit nbd_cleanup(void)
unregister_blkdev(NBD_MAJOR, "nbd");
}

+#ifdef CONFIG_BLK_DEV_NBDROOT
+
+struct nbdroot nbdroot_0 = {.bdev = "nbd0",
+ .server_export = "",
+ .server_addr = ADDR_NONE,
+ .timeout = NBD_DEFAULT_TIMEOUT,
+ .block_size = NBD_DEFAULT_BLOCKSIZE};
+
+static int nbd_connect(struct nbdroot *nbdroot, struct socket **socket)
+{
+ struct socket *sock;
+ struct sockaddr_in sockaddr;
+ int err;
+ char val;
+
+ err = sock_create_kern(&init_net, AF_INET, SOCK_STREAM,
+ IPPROTO_TCP, &sock);
+ if (err < 0)
+ return err;
+
+ sockaddr.sin_family = AF_INET;
+ sockaddr.sin_addr.s_addr = nbdroot->server_addr;
+ sockaddr.sin_port = nbdroot->server_port;
+
+ val = 1;
+ sock->ops->setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val,
+ sizeof(val));
+
+ err = sock->ops->connect(sock, (struct sockaddr *)&sockaddr,
+ sizeof(sockaddr), 0);
+ if (err < 0)
+ return err;
+
+ *socket = sock;
+
+ return 0;
+}
+
+static int nbd_connection_negotiate(struct socket *sock, char *export_name,
+ size_t *rsize, u16 *nflags)
+{
+ char buf[256];
+ int ret;
+ u64 magic;
+ u16 flags;
+ u32 client_flags;
+ u32 opt;
+ u32 name_len;
+ u64 nbd_size;
+
+ ret = sock_xmit_buf(sock, 0, buf, 8);
+ if (ret < 0)
+ return ret;
+
+ if (strncmp(buf, nbd_magic, 8))
+ return -EINVAL;
+
+ ret = sock_xmit_buf(sock, 0, &magic, sizeof(magic));
+ if (ret < 0)
+ return ret;
+ magic = be64_to_cpu(magic);
+
+ if (magic != nbd_opts_magic)
+ return -EINVAL;
+
+ ret = sock_xmit_buf(sock, 0, &flags, sizeof(flags));
+ if (ret < 0)
+ return ret;
+
+ *nflags = ntohs(flags);
+
+ client_flags = 0;
+
+ ret = sock_xmit_buf(sock, 1, &client_flags, sizeof(client_flags));
+ if (ret < 0)
+ return ret;
+
+ magic = cpu_to_be64(nbd_opts_magic);
+ ret = sock_xmit_buf(sock, 1, &magic, sizeof(magic));
+ if (ret < 0)
+ return ret;
+
+ opt = htonl(NBD_OPT_EXPORT_NAME);
+ ret = sock_xmit_buf(sock, 1, &opt, sizeof(opt));
+ if (ret < 0)
+ return ret;
+
+ name_len = strlen(export_name) + 1;
+ name_len = htonl(name_len);
+ ret = sock_xmit_buf(sock, 1, &name_len, sizeof(name_len));
+ if (ret < 0)
+ return ret;
+
+ ret = sock_xmit_buf(sock, 1, export_name, strlen(export_name) + 1);
+ if (ret < 0)
+ return ret;
+
+ ret = sock_xmit_buf(sock, 0, &nbd_size, sizeof(nbd_size));
+ if (ret < 0)
+ return ret;
+ nbd_size = be64_to_cpu(nbd_size);
+
+ ret = sock_xmit_buf(sock, 0, &flags, sizeof(flags));
+ if (ret < 0)
+ return ret;
+ *nflags = ntohs(flags);
+
+ ret = sock_xmit_buf(sock, 0, buf, 124);
+ if (ret < 0)
+ return ret;
+
+ *rsize = nbd_size;
+
+ return 0;
+}
+
+static int nbd_bind_connection(struct nbdroot *nbdroot, struct nbd_device *nbd,
+ struct socket *sock, size_t rsize, u32 flags)
+{
+ int conn, ret;
+ struct block_device *bdev = blkdev_get_by_dev(disk_devt(nbd->disk),
+ FMODE_READ | FMODE_WRITE, 0);
+
+ if (IS_ERR(bdev)) {
+ pr_err("nbdroot: blkdev_get_by_dev failed %ld\n",
+ PTR_ERR(bdev));
+ return PTR_ERR(bdev);
+ }
+
+ conn = nbd->config->num_connections;
+ ret = nbd_add_socket(nbd, sock, false);
+ if (ret) {
+ pr_err("nbdroot: add socket failed %d\n", ret);
+ return ret;
+ }
+
+ mutex_lock(&nbd->config->socks[conn]->tx_lock);
+
+ nbd->config->flags = flags;
+
+ nbd_size_set(nbd, nbdroot->block_size,
+ div_s64(rsize, nbdroot->block_size));
+
+ nbd->tag_set.timeout = nbdroot->timeout * HZ;
+ blk_queue_rq_timeout(nbd->disk->queue, nbdroot->timeout * HZ);
+
+ mutex_unlock(&nbd->config->socks[conn]->tx_lock);
+
+ ret = nbd_start_device_ioctl(nbd, bdev);
+ if (ret) {
+ pr_err("nbdroot: start device ioctl failed %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int nbdroot_thread(void *arg)
+{
+ struct nbdroot *nbdroot = (struct nbdroot *)arg;
+ struct socket *sock = 0;
+ size_t rsize;
+ u16 nflags;
+ int ret;
+ dev_t devt = blk_lookup_devt(nbdroot->bdev, 0);
+ struct gendisk *disk = get_gendisk(devt, &ret);
+ struct nbd_device *nbd = (struct nbd_device *)disk->private_data;
+
+ ret = nbd_connect(nbdroot, &sock);
+ if (ret) {
+ pr_err("nbdroot: connect failed %d\n", ret);
+ goto err;
+ }
+
+ ret = nbd_connection_negotiate(sock, nbdroot->server_export,
+ &rsize, &nflags);
+ if (ret) {
+ pr_err("nbdroot: negotiation failed %d\n", ret);
+ goto err;
+ }
+
+ ret = nbd_bind_connection(nbdroot, nbd, sock, rsize, nflags);
+ if (ret) {
+ pr_err("nbdroot: nbd_bind_connection failed %d\n", ret);
+ goto err;
+ }
+ return 0;
+
+err:
+ pr_err("nbdroot: %s init failed, IP: %pI4, port: %i, export: %s\n",
+ nbdroot->bdev, &nbdroot->server_addr,
+ ntohs(nbdroot->server_port), nbdroot->server_export);
+
+ if (sock)
+ sock_release(sock);
+
+ return ret;
+}
+
+static int __init nbdroot_init(void)
+{
+ if (nbdroot_0.server_port != 0)
+ kthread_run(nbdroot_thread, &nbdroot_0, "nbdroot_0");
+
+ return 0;
+}
+
+/* We need this in late_initcall_sync to be sure that the network is setup */
+late_initcall_sync(nbdroot_init);
+
+/*
+ * Parse format "[<SERVER_IP>:]<SERVER_PORT>/<EXPORT_NAME>"
+ */
+static int __init nbdroot_setup(char *line)
+{
+ struct nbdroot *nbdroot = &nbdroot_0;
+ char *export;
+ u16 port;
+ int ret;
+ char buf[NBD_MAXPATHLEN + 1];
+
+ strlcpy(buf, line, sizeof(buf) - 1);
+
+ nbdroot->server_addr = root_nfs_parse_addr(buf);
+
+ if (*buf == '\0')
+ return -EINVAL;
+
+ if (nbdroot->server_addr == ADDR_NONE) {
+ if (root_server_addr == ADDR_NONE) {
+ pr_err("nbdroot: Failed to find server address\n");
+ return -EINVAL;
+ }
+ nbdroot->server_addr = root_server_addr;
+ }
+
+ export = strchr(buf, '/');
+ *export = '\0';
+ ++export;
+
+ ret = kstrtou16(buf, 10, &port);
+ if (ret)
+ return ret;
+
+ nbdroot->server_port = htons(port);
+ strlcpy(nbdroot->server_export, export,
+ sizeof(nbdroot->server_export) - 1);
+
+ return 0;
+}
+
+__setup("nbdroot=", nbdroot_setup);
+
+#endif /* CONFIG_BLK_DEV_NBDROOT */
+
module_init(nbd_init);
module_exit(nbd_cleanup);

--
2.17.1