[PATCH 5/6] bcache-tools: write nvm namespace super block on nvdimm

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch adds following option to "bcache make",
        -M, --mdev              Format a cache nvmdimm-meta device

This option is used to specify a nvdimm device to store bcache mata
data. Once one or more nvdimm devices are specified as nvdimm-meta
device, routine write_nvm_namespace_sb() will write their super block
(defined by struct bch_nvm_pages_sb) into corresponding location on
the nvdimm-meta device.

Signed-off-by: Coly Li <colyli@xxxxxxx>
---
 bcache.c |   1 +
 make.c   | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 174 insertions(+), 11 deletions(-)

diff --git a/bcache.c b/bcache.c
index 5558914..def1e93 100644
--- a/bcache.c
+++ b/bcache.c
@@ -19,6 +19,7 @@
 #include <assert.h>
 
 #include "features.h"
+#include "nvm_pages.h"
 #include "show.h"
 
 #define BCACHE_TOOLS_VERSION	"1.1"
diff --git a/make.c b/make.c
index 92fe2a2..79ecada 100644
--- a/make.c
+++ b/make.c
@@ -13,6 +13,7 @@
 #define _FILE_OFFSET_BITS	64
 #define __USE_FILE_OFFSET64
 #define _XOPEN_SOURCE 600
+#define _DEFAULT_SOURCE
 
 #include <blkid/blkid.h>
 #include <ctype.h>
@@ -29,6 +30,7 @@
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/mman.h>
 #include <unistd.h>
 #include <uuid/uuid.h>
 
@@ -36,6 +38,7 @@
 #include "lib.h"
 #include "bitwise.h"
 #include "zoned.h"
+#include "nvm_pages.h"
 
 struct sb_context {
 	unsigned int	block_size;
@@ -175,6 +178,7 @@ void usage(void)
 	fprintf(stderr,
 		   "Usage: make-bcache [options] device\n"
 	       "	-C, --cache		Format a cache device\n"
+	       "	-M, --mdev		Format a cache nvmdimm-meta device\n"
 	       "	-B, --bdev		Format a backing device\n"
 	       "	-b, --bucket		bucket size\n"
 	       "	-w, --block		block size (hard sector size of SSD, often 2k)\n"
@@ -409,6 +413,8 @@ static void write_sb(char *dev, struct sb_context *sbc, bool bdev, bool force)
 		sb.nr_in_set		= 1;
 		/* 23 is (SB_SECTOR + SB_SIZE) - 1 sectors */
 		sb.first_bucket		= (23 / sb.bucket_size) + 1;
+		if (nvdimm_meta)
+			sb.first_bucket += SB_JOURNAL_BUCKETS;
 
 		if (sb.nbuckets < 1 << 7) {
 			fprintf(stderr, "Not enough buckets: %llu, need %u\n",
@@ -477,6 +483,139 @@ static void write_sb(char *dev, struct sb_context *sbc, bool bdev, bool force)
 	close(fd);
 }
 
+static void write_nvm_namespace_sb(char *dev,
+				   int this_namespace_nr, int total_namespace_nr,
+				   struct sb_context *sbc, bool force)
+{
+	int fd;
+	struct bch_nvm_pages_sb *nvm_sb = NULL;
+	struct bch_owner_list_head owner_list_head;
+	struct bch_nvm_pages_owner_head system_owner_head;
+	struct bch_nvm_pgalloc_recs system_pgalloc_recs;
+	char uuid_str[40], nvm_pages_set_uuid_str[40];
+	int page_size = getpagesize();
+	void *start_addr = NULL;
+
+	memset(&owner_list_head, 0, sizeof(struct bch_owner_list_head));
+	memset(&system_owner_head, 0, sizeof(struct bch_nvm_pages_owner_head));
+	memset(&system_pgalloc_recs, 0, sizeof(struct bch_nvm_pgalloc_recs));
+
+	fd = open(dev, O_RDWR|O_EXCL);
+	if (fd < 0) {
+		printf("open %s failed: %s\n", dev, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	start_addr = mmap(NULL, BCH_NVM_PAGES_OFFSET, PROT_READ | PROT_WRITE,
+			  MAP_SHARED, fd, 0);
+	if (start_addr == MAP_FAILED) {
+		printf("mmap to %s filed: %s\n", dev, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	nvm_sb = (struct bch_nvm_pages_sb *)
+		(start_addr + BCH_NVM_PAGES_SB_OFFSET);
+
+	if ((!memcmp(nvm_sb->magic, bch_nvm_pages_magic, 16)) &&
+	    (!force)) {
+		fprintf(stderr, "Already a nvdimm meta device on %s,", dev);
+		fprintf(stderr, " overwrite with --force\n");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(start_addr, 0, BCH_NVM_PAGES_OFFSET);
+
+	/* Initialize super block */
+	nvm_sb->sb_offset		= BCH_NVM_PAGES_SB_OFFSET;
+	nvm_sb->version			= BCH_NVM_PAGES_SB_VERSION;
+	memcpy(nvm_sb->magic,		bch_nvm_pages_magic, 16);
+	uuid_generate(nvm_sb->uuid);
+	/* Right now there is only one namespace in the nvm_pages set */
+	uuid_generate(nvm_sb->set_uuid);
+	nvm_sb->page_size		= page_size;
+	nvm_sb->total_namespaces_nr	= total_namespace_nr;
+	nvm_sb->this_namespace_nr	= this_namespace_nr;
+	nvm_sb->flags			= 0;
+	nvm_sb->seq			= 0;
+	nvm_sb->feature_compat		= 0;
+	nvm_sb->feature_incompat	= 0;
+	nvm_sb->feature_ro_compat	= 0;
+	nvm_sb->pages_offset		= BCH_NVM_PAGES_OFFSET;
+	nvm_sb->pages_total		= getblocks(fd) * 512 / page_size;
+
+	if (this_namespace_nr == 0)
+		nvm_sb->owner_list_head	= (struct bch_owner_list_head *)
+					BCH_NVM_PAGES_OWNER_LIST_HEAD_OFFSET;
+	else
+		nvm_sb->owner_list_head	= NULL;
+
+	/* Set checksum, don't modify nvm_sb anymore */
+	nvm_sb->csum = csum_set(nvm_sb);
+
+	uuid_unparse(nvm_sb->uuid, uuid_str);
+	uuid_unparse(nvm_sb->set_uuid, nvm_pages_set_uuid_str);
+
+	printf("Name			%s\n", dev);
+	printf("Type			nvdimm-meta\n");
+	printf("UUID:			%s\n"
+	       "NVM Set UUID:		%s\n"
+	       "version:		%u\n"
+	       "seq:			%u\n"
+	       "total_namespaces_nr:	%u\n"
+	       "this_namespace_nr:	%u\n"
+	       "ns_start:		N/A\n"
+	       "page_size:		%u\n"
+	       "pages_offset:		%llu\n"
+	       "pages_total:		%llu\n",
+	       uuid_str, nvm_pages_set_uuid_str,
+	       (unsigned int) nvm_sb->version,
+	       (unsigned int) nvm_sb->seq,
+	       nvm_sb->total_namespaces_nr,
+	       nvm_sb->this_namespace_nr,
+	       nvm_sb->page_size,
+	       nvm_sb->pages_offset,
+	       nvm_sb->pages_total);
+
+	memcpy(start_addr + BCH_NVM_PAGES_SB_OFFSET, nvm_sb,
+	       sizeof(struct bch_nvm_pages_sb));
+
+	/* Initialize bch_owner_list_head */
+	owner_list_head.size = (sizeof(struct bch_owner_list_head) -
+				offsetof(struct bch_owner_list_head, heads)) /
+			       sizeof(struct bch_nvm_pages_owner_head);
+	memcpy(system_owner_head.uuid, nvm_sb->set_uuid, sizeof(uuid_t));
+	snprintf(system_owner_head.label, BCH_NVM_PAGES_LABEL_SIZE - 1,
+		 "nvm_pages_internal");
+	system_owner_head.recs[0] = (struct bch_nvm_pgalloc_recs *)
+				    BCH_NVM_PAGES_SYS_RECS_HEAD_OFFSET;
+	owner_list_head.heads[0] = system_owner_head;
+	owner_list_head.used = 1;
+	memcpy(start_addr + BCH_NVM_PAGES_OWNER_LIST_HEAD_OFFSET,
+		&owner_list_head, sizeof(struct bch_nvm_pages_owner_head));
+
+	/*
+	 * Initialize bch_nvm_pages_owner_head.heads[0].recs[0]
+	 * - the system internal owner list
+	 */
+	system_pgalloc_recs.owner = (struct bch_nvm_pages_owner_head *)
+			(BCH_NVM_PAGES_OWNER_LIST_HEAD_OFFSET +
+			 offsetof(struct bch_owner_list_head, heads));
+	system_pgalloc_recs.next = NULL;
+	memcpy(system_pgalloc_recs.magic, bch_nvm_pages_pgalloc_magic, 16);
+	memcpy(system_pgalloc_recs.owner_uuid, system_owner_head.uuid, sizeof(uuid_t));
+	system_pgalloc_recs.size = (sizeof(struct bch_nvm_pgalloc_recs) -
+				    offsetof(struct bch_nvm_pgalloc_recs, recs)) /
+				   sizeof(struct bch_nvm_pgalloc_rec);
+	system_pgalloc_recs.used = 0;
+	memcpy(start_addr + BCH_NVM_PAGES_SYS_RECS_HEAD_OFFSET,
+	       &system_pgalloc_recs, sizeof(struct bch_nvm_pgalloc_recs));
+
+	msync(start_addr, BCH_NVM_PAGES_OFFSET, MS_SYNC);
+	munmap(start_addr, BCH_NVM_PAGES_OFFSET);
+
+	close(fd);
+}
+
 static unsigned int get_blocksize(const char *path)
 {
 	struct stat statbuf;
@@ -521,9 +660,13 @@ static unsigned int get_blocksize(const char *path)
 
 int make_bcache(int argc, char **argv)
 {
-	int c, bdev = -1;
-	unsigned int i, ncache_devices = 0, nbacking_devices = 0;
+	int c;
+	unsigned int i;
+	int cdev = -1, bdev = -1, mdev = -1;
+	unsigned int ncache_devices = 0, ncache_nvm_devices = 0;
+	unsigned int nbacking_devices = 0;
 	char *cache_devices[argc];
+	char *cache_nvm_devices[argc];
 	char *backing_devices[argc];
 	char label[SB_LABEL_SIZE] = { 0 };
 	unsigned int block_size = 0, bucket_size = 1024;
@@ -538,6 +681,7 @@ int make_bcache(int argc, char **argv)
 	struct option opts[] = {
 		{ "cache",		0, NULL,	'C' },
 		{ "bdev",		0, NULL,	'B' },
+		{ "nvdimm-meta",	0, NULL,	'M'},
 		{ "bucket",		1, NULL,	'b' },
 		{ "block",		1, NULL,	'w' },
 		{ "writeback",		0, &writeback,	1 },
@@ -554,16 +698,19 @@ int make_bcache(int argc, char **argv)
 		{ NULL,			0, NULL,	0 },
 	};
 
-	while ((c = getopt_long(argc, argv,
-				"-hCBUo:w:b:l:",
-				opts, NULL)) != -1)
+	while ((c = getopt_long(argc, argv, "-hCBMUo:w:b:l:",
+				opts, NULL)) != -1) {
+
 		switch (c) {
 		case 'C':
-			bdev = 0;
+			cdev = 1;
 			break;
 		case 'B':
 			bdev = 1;
 			break;
+		case 'M':
+			mdev = 1;
+			break;
 		case 'b':
 			bucket_size =
 				hatoi_validate(optarg, "bucket size", UINT_MAX);
@@ -610,19 +757,28 @@ int make_bcache(int argc, char **argv)
 			usage();
 			break;
 		case 1:
-			if (bdev == -1) {
-				fprintf(stderr, "Please specify -C or -B\n");
+			if (cdev == -1 && bdev == -1 && mdev == -1) {
+				fprintf(stderr, "Please specify -C, -B or -M\n");
 				exit(EXIT_FAILURE);
 			}
 
-			if (bdev)
+			if (bdev > 0) {
 				backing_devices[nbacking_devices++] = optarg;
-			else
+				printf("backing_devices[%d]: %s\n", nbacking_devices - 1, optarg);
+				bdev = -1;
+			} else if (cdev > 0) {
 				cache_devices[ncache_devices++] = optarg;
+				printf("cache_devices[%d]: %s\n", ncache_devices - 1, optarg);
+				cdev = -1;
+			} else if (mdev > 0) {
+				cache_nvm_devices[ncache_nvm_devices++] = optarg;
+				mdev = -1;
+			}
 			break;
 		}
+	} /* while */
 
-	if (!ncache_devices && !nbacking_devices) {
+	if (!ncache_devices && !ncache_nvm_devices && !nbacking_devices) {
 		fprintf(stderr, "Please supply a device\n");
 		usage();
 	}
@@ -657,6 +813,7 @@ int make_bcache(int argc, char **argv)
 	sbc.data_offset = data_offset;
 	memcpy(sbc.set_uuid, set_uuid, sizeof(sbc.set_uuid));
 	sbc.label = label;
+	sbc.nvdimm_meta = (ncache_nvm_devices > 0) ? true : false;
 
 	for (i = 0; i < ncache_devices; i++)
 		write_sb(cache_devices[i], &sbc, false, force);
@@ -668,5 +825,10 @@ int make_bcache(int argc, char **argv)
 		write_sb(backing_devices[i], &sbc, true, force);
 	}
 
+	for (i = 0; i < ncache_nvm_devices; i++) {
+		write_nvm_namespace_sb(cache_nvm_devices[i], i,
+				       ncache_nvm_devices, &sbc,
+				       force);
+	}
 	return 0;
 }
-- 
2.26.2




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux