[PATCH v5 3/3] init: add support to directly boot to a mapped device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This change adds a dm= kernel parameter modelled after the md= parameter
from do_mounts_md. It allows for simple device-mapper targets to be
configured at boot time for use early in the boot process (as the root
device or otherwise).

The format is as follows:
dm="count name uuid|none ro|rw num-tables, table line 1, table line 2, ..."

It relies on the ability to create a device mapper device programmatically
and bind that device to the ioctl-style name/uuid.

Based on work by Will Drewry and Paul Taysom.

Signed-off-by: Kees Cook <keescook@xxxxxxxxxxxx>
---
v5: resurrection, multiple devices, cleanups, error reporting improvements
v4: https://patchwork.kernel.org/patch/104861/
---
 Documentation/device-mapper/boot.txt |  64 ++++++
 Documentation/kernel-parameters.txt  |   4 +
 init/Makefile                        |   1 +
 init/do_mounts.c                     |   1 +
 init/do_mounts.h                     |  10 +
 init/do_mounts_dm.c                  | 433 +++++++++++++++++++++++++++++++++++
 6 files changed, 513 insertions(+)
 create mode 100644 Documentation/device-mapper/boot.txt
 create mode 100644 init/do_mounts_dm.c

diff --git a/Documentation/device-mapper/boot.txt b/Documentation/device-mapper/boot.txt
new file mode 100644
index 000000000000..9dd04b37b3b1
--- /dev/null
+++ b/Documentation/device-mapper/boot.txt
@@ -0,0 +1,64 @@
+Boot time creation of mapped devices
+====================================
+It is possible to configure a device mapper device to act as the root
+device for your system in two ways.
+
+The first is to build an initial ramdisk which boots to a minimal
+userspace which configures the device, then pivot_root(8) in to it.
+
+The second is to possible when the device-mapper and any targets are
+compiled into the kernel (not a module), one or more device-mappers may
+be created and used as the root device at boot time with the parameters
+given with the boot line dm=...
+
+Multiple device-mappers can be stacked by specifying the number of
+devices. A device can have multiple tables if the the number of tables
+is specified.
+
+	<dm>		::= <num-mappers> <device-mapper>+
+	<device-mapper>	::= <head> "," <table>+
+	<head>		::= <name> <uuid> <mode> [<num-tables>]
+	<table>		::= <start> <length> <type> <options> ","
+	<mode>		::= "ro" | "rw"
+	<uuid>		::= xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx | "none"
+	<type>		::= "verity" | "bootcache" | ...
+
+Each tables line may be as normal when using the dmsetup tool except for
+two variations:
+1. Any use of commas will be interpreted as a newline
+2. Quotation marks cannot be escaped and cannot be used without
+   terminating the dm= argument.
+
+Unless renamed by udev, the device node created will be dm-0 as the
+first minor number for the device-mapper is used during early creation.
+
+The <num-tables> field is optional and assumed to be 1.
+
+Examples
+========
+An example of booting to a linear array made up of user-mode linux block
+devices:
+
+  dm="1 lroot none rw 2, 0 4096 linear 98:16 0, 4096 4096 linear 98:32 0" \
+  root=/dev/dm-0
+
+This will boot to a rw dm-linear target of 8192 sectors split across two
+block devices identified by their major:minor numbers.  After boot, udev
+will rename this target to /dev/mapper/lroot (depending on the rules).
+No uuid was assigned.
+
+An example of multiple device-mappers, with the dm="..." contents shown
+here split on multiple lines for readability:
+
+  3 vboot none ro,
+      0 1768000 bootcache
+        device=aa55b119-2a47-8c45-946a-5ac57765011f+1
+        signature=76e9be054b15884a9fa85973e9cb274c93afadb6
+        cache_start=1768000 max_blocks=100000 size_limit=23 max_trace=20000,
+    vroot none ro,
+      0 1740800 verity payload=254:0 hashtree=254:0 hashstart=1740800 alg=sha1
+        root_hexdigest=76e9be054b15884a9fa85973e9cb274c93afadb6
+        salt=5b3549d54d6c7a3837b9b81ed72e49463a64c03680c47835bef94d768e5646fe,
+    vram none rw 2,
+      0 32768 linear 1:0 0,
+      32768 32768 linear 1:1 0,
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9a53c929f017..ba3a49bc1229 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -56,6 +56,7 @@ parameter is applicable:
 	BLACKFIN Blackfin architecture is enabled.
 	CLK	Common clock infrastructure is enabled.
 	CMA	Contiguous Memory Area support is enabled.
+	DM	Device mapper support is enabled.
 	DRM	Direct Rendering Management support is enabled.
 	DYNAMIC_DEBUG Build in debug messages and enable them at runtime
 	EDD	BIOS Enhanced Disk Drive Services (EDD) is enabled
@@ -930,6 +931,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	dis_ucode_ldr	[X86] Disable the microcode loader.
 
+	dm=		[DM] Allows early creation of a device-mapper device.
+			See Documentation/device-mapper/boot.txt.
+
 	dma_debug=off	If the kernel is compiled with DMA_API_DEBUG support,
 			this option disables the debugging code at boot.
 
diff --git a/init/Makefile b/init/Makefile
index 7bc47ee31c36..90410fd7b658 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -18,6 +18,7 @@ mounts-y			:= do_mounts.o
 mounts-$(CONFIG_BLK_DEV_RAM)	+= do_mounts_rd.o
 mounts-$(CONFIG_BLK_DEV_INITRD)	+= do_mounts_initrd.o
 mounts-$(CONFIG_BLK_DEV_MD)	+= do_mounts_md.o
+mounts-$(CONFIG_BLK_DEV_DM)	+= do_mounts_dm.o
 
 # dependencies on generated files need to be listed explicitly
 $(obj)/version.o: include/generated/compile.h
diff --git a/init/do_mounts.c b/init/do_mounts.c
index dea5de95c2dd..1902a1c80831 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -566,6 +566,7 @@ void __init prepare_namespace(void)
 	wait_for_device_probe();
 
 	md_run_setup();
+	dm_run_setup();
 
 	if (saved_root_name[0]) {
 		root_device_name = saved_root_name;
diff --git a/init/do_mounts.h b/init/do_mounts.h
index 067af1d9e8b6..ecb275782c03 100644
--- a/init/do_mounts.h
+++ b/init/do_mounts.h
@@ -74,3 +74,13 @@ void md_run_setup(void);
 static inline void md_run_setup(void) {}
 
 #endif
+
+#ifdef CONFIG_BLK_DEV_DM
+
+void dm_run_setup(void);
+
+#else
+
+static inline void dm_run_setup(void) {}
+
+#endif
diff --git a/init/do_mounts_dm.c b/init/do_mounts_dm.c
new file mode 100644
index 000000000000..331a0d551a34
--- /dev/null
+++ b/init/do_mounts_dm.c
@@ -0,0 +1,433 @@
+/*
+ * do_mounts_dm.c
+ * Copyright (C) 2010 The Chromium OS Authors <chromium-os-dev@xxxxxxxxxxxx>
+ * Based on do_mounts_md.c
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/async.h>
+#include <linux/ctype.h>
+#include <linux/device-mapper.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+
+#include "do_mounts.h"
+
+#define DM_MAX_DEVICES 256
+#define DM_MAX_TARGETS 256
+#define DM_MAX_NAME 32
+#define DM_MAX_UUID 129
+#define DM_NO_UUID "none"
+
+#define DM_MSG_PREFIX "init"
+#define DMERR_PARSE(fmt, args...) \
+	DMERR("failed to parse " fmt " for device %s<%lu>", args)
+
+/* Separators used for parsing the dm= argument. */
+#define DM_FIELD_SEP " "
+#define DM_LINE_SEP ","
+#define DM_ANY_SEP DM_FIELD_SEP DM_LINE_SEP
+
+/* See Documentation/device-mapper/boot.txt for dm="..." format details. */
+
+struct dm_setup_table {
+	sector_t begin;
+	sector_t length;
+	char *type;
+	char *params;
+	/* simple singly linked list */
+	struct dm_setup_table *next;
+};
+
+struct dm_device {
+	int minor;
+	int ro;
+	char name[DM_MAX_NAME];
+	char uuid[DM_MAX_UUID];
+	unsigned long num_tables;
+	struct dm_setup_table *table;
+	int table_count;
+	struct dm_device *next;
+};
+
+struct dm_option {
+	char *start;
+	char *next;
+	size_t len;
+	char delim;
+};
+
+static struct {
+	unsigned long num_devices;
+	char *str;
+} dm_setup_args __initdata;
+
+static int dm_early_setup __initdata;
+
+static int __init get_dm_option(struct dm_option *opt, const char *accept)
+{
+	char *str = opt->next;
+	char *endp;
+
+	if (!str)
+		return 0;
+
+	str = skip_spaces(str);
+	opt->start = str;
+	endp = strpbrk(str, accept);
+	if (!endp) {  /* act like strchrnul */
+		opt->len = strlen(str);
+		endp = str + opt->len;
+	} else {
+		opt->len = endp - str;
+	}
+	opt->delim = *endp;
+	if (*endp == 0) {
+		/* Don't advance past the nul. */
+		opt->next = endp;
+	} else {
+		opt->next = endp + 1;
+	}
+	return opt->len != 0;
+}
+
+static int __init get_dm_option_u64(struct dm_option *opt, const char *sep,
+				    unsigned long long *result)
+{
+	char buf[32];
+
+	if (!get_dm_option(opt, sep))
+		return -EINVAL;
+
+	strlcpy(buf, opt->start, min(sizeof(buf), opt->len + 1));
+	return kstrtoull(buf, 0, result);
+}
+
+static void __init dm_setup_cleanup(struct dm_device *devices)
+{
+	struct dm_device *dev = devices;
+
+	while (dev) {
+		struct dm_device *old_dev = dev;
+		struct dm_setup_table *table = dev->table;
+
+		while (table) {
+			struct dm_setup_table *old_table = table;
+
+			kfree(table->type);
+			kfree(table->params);
+			table = table->next;
+			kfree(old_table);
+			dev->table_count--;
+		}
+		WARN_ON(dev->table_count);
+		dev = dev->next;
+		kfree(old_dev);
+	}
+}
+
+static char * __init dm_parse_device(struct dm_device *dev, char *str,
+				     unsigned long idx)
+{
+	struct dm_option opt;
+	size_t len;
+	unsigned long long num_tables;
+
+	/* Grab the logical name of the device to be exported to udev */
+	opt.next = str;
+	if (!get_dm_option(&opt, DM_FIELD_SEP)) {
+		DMERR_PARSE("name", "", idx);
+		goto parse_fail;
+	}
+	len = min(opt.len + 1, sizeof(dev->name));
+	strlcpy(dev->name, opt.start, len);  /* includes nul */
+
+	/* Grab the UUID value or "none" */
+	if (!get_dm_option(&opt, DM_FIELD_SEP)) {
+		DMERR_PARSE("uuid", dev->name, idx);
+		goto parse_fail;
+	}
+	len = min(opt.len + 1, sizeof(dev->uuid));
+	strlcpy(dev->uuid, opt.start, len);
+
+	/* Determine if the table/device will be read only or read-write */
+	get_dm_option(&opt, DM_ANY_SEP);
+	if (!strncmp("ro", opt.start, opt.len)) {
+		dev->ro = 1;
+	} else if (!strncmp("rw", opt.start, opt.len)) {
+		dev->ro = 0;
+	} else {
+		DMERR_PARSE("table mode", dev->name, idx);
+		goto parse_fail;
+	}
+
+	/* Optional number field */
+	if (opt.delim == DM_FIELD_SEP[0]) {
+		if (get_dm_option_u64(&opt, DM_LINE_SEP, &num_tables)) {
+			DMERR_PARSE("number of tables", dev->name, idx);
+			goto parse_fail;
+		}
+	} else {
+		num_tables = 1;
+	}
+	if (num_tables > DM_MAX_TARGETS) {
+		DMERR_PARSE("too many tables (%llu > %d)", num_tables,
+			    DM_MAX_TARGETS, dev->name, idx);
+	}
+	dev->num_tables = num_tables;
+
+	return opt.next;
+
+parse_fail:
+	return NULL;
+}
+
+static char * __init dm_parse_tables(struct dm_device *dev, char *str,
+				     unsigned long idx)
+{
+	struct dm_option opt;
+	struct dm_setup_table **table = &dev->table;
+	unsigned long num_tables = dev->num_tables;
+	unsigned long i;
+	unsigned long long value;
+
+	/*
+	 * Tables are defined as per the normal table format but with a
+	 * comma as a newline separator.
+	 */
+	opt.next = str;
+	for (i = 0; i < num_tables; i++) {
+		*table = kzalloc(sizeof(struct dm_setup_table), GFP_KERNEL);
+		if (!*table) {
+			DMERR_PARSE("table %lu (out of memory)", i, dev->name,
+				    idx);
+			goto parse_fail;
+		}
+		dev->table_count++;
+
+		if (get_dm_option_u64(&opt, DM_FIELD_SEP, &value)) {
+			DMERR_PARSE("starting sector for table %lu", i,
+				    dev->name, idx);
+			goto parse_fail;
+		}
+		(*table)->begin = value;
+
+		if (get_dm_option_u64(&opt, DM_FIELD_SEP, &value)) {
+			DMERR_PARSE("length for table %lu", i, dev->name, idx);
+			goto parse_fail;
+		}
+		(*table)->length = value;
+
+		if (get_dm_option(&opt, DM_FIELD_SEP))
+			(*table)->type = kstrndup(opt.start, opt.len,
+							GFP_KERNEL);
+		if (!((*table)->type)) {
+			DMERR_PARSE("type for table %lu", i, dev->name, idx);
+			goto parse_fail;
+		}
+		if (get_dm_option(&opt, DM_LINE_SEP))
+			(*table)->params = kstrndup(opt.start, opt.len,
+						    GFP_KERNEL);
+		if (!((*table)->params)) {
+			DMERR_PARSE("params for table %lu", i, dev->name, idx);
+			goto parse_fail;
+		}
+		table = &((*table)->next);
+	}
+	DMDEBUG("tables parsed: %d", dev->table_count);
+
+	return opt.next;
+
+parse_fail:
+	return NULL;
+}
+
+static struct dm_device *dm_parse_args(void)
+{
+	struct dm_device *devices = NULL;
+	struct dm_device **tail = &devices;
+	struct dm_device *dev;
+	char *str = dm_setup_args.str;
+	unsigned long num_devices = dm_setup_args.num_devices;
+	unsigned long i;
+
+	if (!str)
+		return NULL;
+	for (i = 0; i < num_devices; i++) {
+		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+		if (!dev) {
+			DMERR("failed to allocated memory for device %lu", i);
+			goto error;
+		}
+		*tail = dev;
+		tail = &dev->next;
+		/*
+		 * devices are given minor numbers 0 - n-1
+		 * in the order they are found in the arg
+		 * string.
+		 */
+		dev->minor = i;
+		str = dm_parse_device(dev, str, i);
+		if (!str)	/* NULL indicates error in parsing, bail */
+			goto error;
+
+		str = dm_parse_tables(dev, str, i);
+		if (!str)
+			goto error;
+	}
+	return devices;
+error:
+	dm_setup_cleanup(devices);
+	return NULL;
+}
+
+/*
+ * Parse the command-line parameters given our kernel, but do not
+ * actually try to invoke the DM device now; that is handled by
+ * dm_setup_drives after the low-level disk drivers have initialised.
+ * dm format is described at the top of the file.
+ *
+ * Because dm minor numbers are assigned in assending order starting with 0,
+ * You can assume the first device is /dev/dm-0, the next device is /dev/dm-1,
+ * and so forth.
+ */
+static int __init dm_setup(char *str)
+{
+	struct dm_option opt;
+	unsigned long long num_devices;
+
+	if (!str) {
+		DMERR("setup str is NULL");
+		goto parse_fail;
+	}
+
+	DMDEBUG("Want to parse \"%s\"", str);
+	opt.next = str;
+	if (get_dm_option_u64(&opt, DM_FIELD_SEP, &num_devices))
+		goto parse_fail;
+	str = opt.next;
+	if (num_devices > DM_MAX_DEVICES) {
+		DMERR("too many devices %llu > %d", num_devices,
+		      DM_MAX_DEVICES);
+	}
+	dm_setup_args.num_devices = num_devices;
+	dm_setup_args.str = str;
+
+	DMINFO("will configure %lu device%s", dm_setup_args.num_devices,
+	       dm_setup_args.num_devices == 1 ? "" : "s");
+	dm_early_setup = 1;
+	return 1;
+
+parse_fail:
+	DMWARN("Invalid arguments supplied to dm=.");
+	return 0;
+}
+
+static void __init dm_setup_drives(void)
+{
+	struct mapped_device *md = NULL;
+	struct dm_table *tables = NULL;
+	struct dm_setup_table *table;
+	struct dm_device *dev;
+	char *uuid;
+	fmode_t fmode = FMODE_READ;
+	struct dm_device *devices;
+
+	devices = dm_parse_args();
+
+	for (dev = devices; dev; dev = dev->next) {
+		if (dm_create(dev->minor, &md)) {
+			DMERR("failed to create device %s", dev->name);
+			goto fail;
+		}
+		DMDEBUG("created device '%s'", dm_device_name(md));
+
+		/*
+		 * In addition to flagging the table below, the disk must be
+		 * set explicitly ro/rw.
+		 */
+		set_disk_ro(dm_disk(md), dev->ro);
+
+		if (!dev->ro)
+			fmode |= FMODE_WRITE;
+		if (dm_table_create(&tables, fmode, dev->table_count, md)) {
+			DMERR("failed to create device %s tables", dev->name);
+			goto fail_put;
+		}
+		for (table = dev->table; table; table = table->next) {
+			DMINFO("device %s adding table '%llu %llu %s %s'",
+			       dev->name,
+			       (unsigned long long) table->begin,
+			       (unsigned long long) table->length,
+			       table->type, table->params);
+			if (dm_table_add_target(tables, table->type,
+						table->begin,
+						table->length,
+						table->params)) {
+				DMERR("failed to add table to device %s",
+					dev->name);
+				goto fail_put;
+			}
+		}
+		dm_lock_md_type(md);
+		if (dm_table_complete(tables)) {
+			DMERR("failed to complete device %s tables",
+				dev->name);
+			dm_unlock_md_type(md);
+			goto fail_put;
+		}
+		dm_unlock_md_type(md);
+
+		/* Suspend the device so that we can bind it to the tables. */
+		if (dm_suspend(md, 0)) {
+			DMERR("failed to suspend device %s pre-bind",
+				dev->name);
+			goto fail_put;
+		}
+
+		/*
+		 * Bind the tables to the device. This is the only way
+		 * to associate md->map with the tables and set the disk
+		 * capacity directly.
+		 */
+		if (dm_swap_table(md, tables)) {  /* should return NULL. */
+			DMERR("failed to bind device %s to tables",
+				dev->name);
+			goto fail_put;
+		}
+
+		/* Finally, resume and the device should be ready. */
+		if (dm_resume(md)) {
+			DMERR("failed to resume device %s", dev->name);
+			goto fail_put;
+		}
+
+		/* Export the dm device via the ioctl interface */
+		if (!strcmp(DM_NO_UUID, dev->uuid))
+			uuid = NULL;
+		if (dm_ioctl_export(md, dev->name, uuid)) {
+			DMERR("failed to export device %s", dev->name);
+			goto fail_put;
+		}
+		DMINFO("dm-%d (%s) is ready", dev->minor, dev->name);
+	}
+	dm_setup_cleanup(devices);
+	return;
+
+fail_put:
+	dm_put(md);
+fail:
+	DMERR("starting dm-%d (%s) failed", dev->minor, dev->name);
+	dm_setup_cleanup(devices);
+}
+
+__setup("dm=", dm_setup);
+
+void __init dm_run_setup(void)
+{
+	if (!dm_early_setup)
+		return;
+	DMINFO("attempting early device configuration.");
+	dm_setup_drives();
+}
-- 
2.6.3

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux