[RFC mdadm PATCH 10/11] imsm: assemble cache volumes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Teach load_super to examine the passed in fd and determine if it is a
cache or cache-target md device.

Generate info to allow the two halves of the cache to be assembled.

XXX: what are the rules we need for compare_super to determine stale
cache associations?

Create a LEVEL_ISRT md device.

Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
 isrt-intel.h  |    2 +
 maps.c        |    1 
 mdadm.h       |    1 
 super-intel.c |  191 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 sysfs.c       |    8 ++
 util.c        |    1 
 6 files changed, 198 insertions(+), 6 deletions(-)

diff --git a/isrt-intel.h b/isrt-intel.h
index 6d7e92f4da37..ea106c5ac02c 100644
--- a/isrt-intel.h
+++ b/isrt-intel.h
@@ -28,6 +28,8 @@ enum {
 	NVC_SIG_LEN = 32,
 	ISRT_DEV_IDX = 0,
 	ISRT_TARGET_DEV_IDX = 1,
+	ISRT_ROLE_CACHE = 0,
+	ISRT_ROLE_TARGET = 1,
 
 	NV_CACHE_MODE_OFF          = 0,
 	NV_CACHE_MODE_OFF_TO_SAFE  = 1, /* powerfail recovery state */
diff --git a/maps.c b/maps.c
index 64f1df2c42c3..28c010fdf9bf 100644
--- a/maps.c
+++ b/maps.c
@@ -93,6 +93,7 @@ mapping_t pers[] = {
 	{ "10", 10},
 	{ "faulty", LEVEL_FAULTY},
 	{ "container", LEVEL_CONTAINER},
+	{ "isrt", LEVEL_ISRT },
 	{ NULL, 0}
 };
 
diff --git a/mdadm.h b/mdadm.h
index 111f90f599af..e613d3866d8b 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -1457,6 +1457,7 @@ char *xstrdup(const char *str);
 #define	LEVEL_MULTIPATH		(-4)
 #define	LEVEL_LINEAR		(-1)
 #define	LEVEL_FAULTY		(-5)
+#define	LEVEL_ISRT		(-12)
 
 /* kernel module doesn't know about these */
 #define LEVEL_CONTAINER		(-100)
diff --git a/super-intel.c b/super-intel.c
index 7a7a48e9e6d7..e69d2a044e92 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -379,6 +379,8 @@ struct intel_super {
 	int updates_pending; /* count of pending updates for mdmon */
 	int current_vol; /* index of raid device undergoing creation */
 	unsigned long long create_offset; /* common start for 'current_vol' */
+	int load_cache; /* flag to indicate we are operating on the cache metadata */
+	int cache_dev; /* subarray/volume index of the cache volume */
 	__u32 random; /* random data for seeding new family numbers */
 	struct intel_dev *devlist;
 	struct dl {
@@ -1246,6 +1248,9 @@ static void print_imsm_dev(struct intel_super *super, struct intel_dev *dv,
 	struct imsm_map *map = get_imsm_map(dev, MAP_0);
 	struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
 
+	if (super->load_cache)
+		examine_cache(super, dv);
+
 	printf("\n");
 	printf("[%.16s]:\n", dev->volume);
 	__fname_from_uuid(info->uuid, 0, uuid_str, ':');
@@ -1339,7 +1344,7 @@ static void print_imsm_dev(struct intel_super *super, struct intel_dev *dv,
 	printf("\n");
 	printf("    Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
 
-	if (is_isrt_leg(dv)) {
+	if (!super->load_cache) {
 		printf("\n");
 		examine_cache(super, dv);
 	}
@@ -1514,6 +1519,8 @@ static int imsm_check_attributes(__u32 attributes)
 
 #ifndef MDASSEMBLE
 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
+static void getinfo_super_imsm_cache(struct intel_super *super, struct intel_dev *dv,
+				     struct mdinfo *info, char *map);
 
 static void examine_super_imsm(struct supertype *st, char *homehost)
 {
@@ -1527,6 +1534,18 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
 	__u32 reserved = imsm_reserved_sectors(super, super->disks);
 	struct dl *dl;
 
+	if (super->load_cache) {
+		struct intel_dev *dv = get_intel_dev(super, super->cache_dev);
+		struct mdinfo info;
+
+		super->load_cache = 0;
+		super->current_vol = super->cache_dev;
+		getinfo_super_imsm(st, &info, NULL);
+		super->load_cache = 1;
+		print_imsm_dev(super, dv, &info, super->disks->index);
+		return;
+	}
+
 	snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
 	printf("          Magic : %s\n", str);
 	snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
@@ -1595,21 +1614,24 @@ static void brief_examine_super_imsm(struct supertype *st, int verbose)
 
 	getinfo_super_imsm(st, &info, NULL);
 	fname_from_uuid(st, &info, nbuf, ':');
-	printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
+	if (super->load_cache)
+		printf("ARRAY UUID=%s\n", nbuf + 5);
+	else
+		printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
 }
 
 static void brief_examine_cache_imsm(struct supertype *st, int cache_leg)
 {
-	int uuid[4];
 	char nbuf[64];
+	struct mdinfo info;
 	struct intel_super *super = st->sb;
 	struct intel_dev *dv = get_isrt_leg(super, cache_leg);
 
 	if (!dv)
 		return;
 
-	cache_volume_uuid(super, dv, uuid);
-	__fname_from_uuid(uuid, 0, nbuf, ':');
+	getinfo_super_imsm_cache(super, dv, &info, NULL);
+	fname_from_uuid(st, &info, nbuf, ':');
 	printf("ARRAY UUID=%s\n", nbuf + 5);
 }
 
@@ -1621,6 +1643,10 @@ static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
 	struct mdinfo info;
 	struct intel_super *super = st->sb;
 
+	/* don't re-report container metadata info */
+	if (super->load_cache)
+		return;
+
 	if (!super->anchor->num_raid_devs)
 		return;
 
@@ -2956,6 +2982,71 @@ static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
 	return NULL;
 }
 
+static void getinfo_super_imsm_cache(struct intel_super *super, struct intel_dev *dv,
+				     struct mdinfo *info, char *dmap)
+{
+	__u16 nv_cache_mode;
+	int role_failed = 0, role;
+	struct imsm_dev *dev = dv->dev;
+	struct imsm_map *map = get_imsm_map(dev, MAP_X);
+
+	memset(info, 0, sizeof(*info));
+
+	role = dv->nvc ? ISRT_ROLE_CACHE : ISRT_ROLE_TARGET;
+	if (role == ISRT_ROLE_CACHE) {
+		struct nv_cache_vol_config_md *cfg = &dv->nvc->hdr.vol_config_md[0];
+
+		nv_cache_mode = cfg->nv_cache_mode;
+		info->events = 0;
+	} else {
+		nv_cache_mode = dev->nv_cache_mode;
+		info->events = 1; /* make Assemble choose the cache target */
+	}
+
+	if (map->map_state == IMSM_T_STATE_FAILED ||
+	    nv_cache_mode == NV_CACHE_MODE_IS_FAILING ||
+	    nv_cache_mode == NV_CACHE_MODE_HAS_FAILED)
+		role_failed = 1;
+
+	info->array.raid_disks    = 2;
+	info->array.level         = LEVEL_ISRT;
+	info->array.layout        = 0;
+	info->array.md_minor      = -1;
+	info->array.ctime         = 0;
+	info->array.utime         = 0;
+	info->array.chunk_size    = 0;
+
+	info->disk.major = 0;
+	info->disk.minor = 0;
+	info->disk.raid_disk = role;
+	info->reshape_active = 0;
+	info->array.major_version = -1;
+	info->array.minor_version = -2;
+	strcpy(info->text_version, "isrt");
+	info->safe_mode_delay = 0;
+	info->disk.number = role;
+	info->name[0] = 0;
+	info->recovery_start = MaxSector;
+	info->data_offset = 0;
+	info->custom_array_size = __le32_to_cpu(dev->size_high);
+	info->custom_array_size <<= 32;
+	info->custom_array_size |= __le32_to_cpu(dev->size_low);
+	info->component_size = info->custom_array_size;
+
+	if (role_failed)
+		info->disk.state = (1 << MD_DISK_FAULTY);
+	else
+		info->disk.state = (1 << MD_DISK_ACTIVE) | (1 << MD_DISK_SYNC);
+	cache_volume_uuid(super, dv, info->uuid);
+
+	if (dmap) {
+		/* we can only report self-state */
+		dmap[!role] = 1;
+		dmap[role] = !role_failed;
+	}
+}
+
+
 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
 {
 	int i;
@@ -2965,6 +3056,20 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *
 	int max_enough = -1, cache_legs = 0;
 	int map_disks = info->array.raid_disks;
 
+	if (super->load_cache || st->cache_leg) {
+		struct intel_dev *dv;
+
+		if (st->cache_leg) {
+			dv = get_isrt_leg(super, st->cache_leg);
+			if (!dv)
+				return;
+		} else
+			dv = get_intel_dev(super, super->cache_dev);
+
+		getinfo_super_imsm_cache(super, dv, info, map);
+		return;
+	}
+
 	if (super->current_vol >= 0) {
 		getinfo_super_imsm_volume(st, info, map);
 		return;
@@ -3266,6 +3371,27 @@ static int compare_super_imsm(struct supertype *st, struct supertype *tst)
 		}
 	}
 
+	/* cache configuration metadata lives on member arrays, as long
+	 * as they mutually agree on the volume-uuid then consider them a match
+	 * XXX: sufficient? we do have the failure checks in
+	 * getinfo_super_cache() to mitigate
+	 */
+	if (first->load_cache != sec->load_cache)
+		return 3;
+	else if (first->load_cache) {
+		struct intel_dev *first_dv, *sec_dv;
+		int first_uuid[4], sec_uuid[4];
+
+		first_dv = get_intel_dev(first, first->cache_dev);
+		sec_dv = get_intel_dev(sec, sec->cache_dev);
+		cache_volume_uuid(first, first_dv, first_uuid);
+		cache_volume_uuid(sec, sec_dv, sec_uuid);
+		if (memcmp(first_uuid, sec_uuid, sizeof(first_uuid)))
+			return 3;
+		else
+			return 0;
+	}
+
 	/* if an anchor does not have num_raid_devs set then it is a free
 	 * floating spare
 	 */
@@ -4621,6 +4747,52 @@ static int load_container_imsm(struct supertype *st, int fd, char *devname)
 {
 	return load_super_imsm_all(st, fd, &st->sb, devname, NULL, 1);
 }
+
+static int load_super_cache(struct supertype *st, int fd, char *devname)
+{
+	struct mdinfo *sra = sysfs_read(fd, 0, GET_VERSION);
+	char *subarray, *devnm, *ep;
+	int cfd, cache_dev, err = 1;
+	struct intel_super *super;
+	struct intel_dev *dv;
+
+	if (sra && sra->array.major_version == -1 &&
+	    is_subarray(sra->text_version))
+		/* pass */;
+	else
+		goto out;
+
+	/* modify sra->text_version in place */
+	ep = strchr(sra->text_version+1, '/');
+	*ep = '\0';
+	devnm = sra->text_version+1;
+	subarray = ep+1;
+
+	cfd = open_dev(devnm);
+	if (cfd < 0)
+		goto out;
+
+	err = load_container_imsm(st, cfd, devname);
+	close(cfd);
+	if (err)
+		goto out;
+
+	super = st->sb;
+	cache_dev = strtoul(subarray, &ep, 10);
+	/* validate this volume is a cache or cache-target */
+	if (*ep != '\0' || !(dv = get_intel_dev(super, cache_dev))
+	    || !is_isrt_leg(dv)) {
+		free_super_imsm(st);
+		err = 2;
+		goto out;
+	}
+
+	super->load_cache = 1;
+	super->cache_dev = cache_dev;
+ out:
+	sysfs_free(sra);
+	return err;
+}
 #endif
 
 static int load_super_imsm(struct supertype *st, int fd, char *devname)
@@ -4634,6 +4806,15 @@ static int load_super_imsm(struct supertype *st, int fd, char *devname)
 
 	free_super_imsm(st);
 
+#ifndef MDASSEMBLE
+	/* check if this is a component leg of a cache array and load
+	 * the cache metadata from the parent container
+	 */
+	rv = load_super_cache(st, fd, devname);
+	if (rv == 0)
+		return rv;
+#endif
+
 	super = alloc_super();
 	/* Load hba and capabilities if they exist.
 	 * But do not preclude loading metadata in case capabilities or hba are
diff --git a/sysfs.c b/sysfs.c
index 4cbd4e5d051b..898edde49392 100644
--- a/sysfs.c
+++ b/sysfs.c
@@ -638,7 +638,13 @@ int sysfs_set_array(struct mdinfo *info, int vers)
 	rv |= sysfs_set_num(info, NULL, "raid_disks", raid_disks);
 	rv |= sysfs_set_num(info, NULL, "chunk_size", info->array.chunk_size);
 	rv |= sysfs_set_num(info, NULL, "layout", info->array.layout);
-	rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2);
+	if (info->array.level == LEVEL_ISRT) {
+		/* FIXME: how do we support asymmetric component sizes for
+		 * external metadata?
+		 */
+		rv |= sysfs_set_num(info, NULL, "component_size", 0);
+	} else
+		rv |= sysfs_set_num(info, NULL, "component_size", info->component_size/2);
 	if (info->custom_array_size) {
 		int rc;
 
diff --git a/util.c b/util.c
index 93f9200fa4c7..c9c4dec0fac1 100644
--- a/util.c
+++ b/util.c
@@ -362,6 +362,7 @@ int enough(int level, int raid_disks, int layout, int clean, char *avail)
 
 	case LEVEL_MULTIPATH:
 		return avail_disks>= 1;
+	case LEVEL_ISRT:
 	case LEVEL_LINEAR:
 	case 0:
 		return avail_disks == raid_disks;

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux