[PATCH 29/31] mdadm: migration restart for external meta

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add support for assembling partially migrated arrays with external meta.
Note that if Raid0 was used while migration it should be changed to
Raid4 while assembling (see check_mpb_migr_compatibility and switch_raid0_configuration).

getinfo_super_imsm_volume() reads migration record and initializes mdadm reshape specific structures.

Signed-off-by: Maciej Trela <maciej.trela@xxxxxxxxx>
Signed-off-by: Adam Kwolek <adam.kwolek@xxxxxxxxx>
---

 mdadm/mdadm/Assemble.c    |    8 ++
 mdadm/mdadm/super-intel.c |  199 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 204 insertions(+), 3 deletions(-)

diff --git a/mdadm/mdadm/Assemble.c b/mdadm/mdadm/Assemble.c index 409f0d7..c34c109 100644
--- a/mdadm/mdadm/Assemble.c
+++ b/mdadm/mdadm/Assemble.c
@@ -1313,6 +1313,14 @@ int assemble_container_content(struct supertype *st, int mdfd,
 			close(mdfd);
 			return 1;
 		}
+
+	if (content->reshape_active) {
+		sysfs_set_num(sra, NULL, "reshape_position", content->reshape_progress);
+		sysfs_set_num(sra, NULL, "chunk_size", content->new_chunk);
+		sysfs_set_num(sra, NULL, "layout", content->new_layout);
+		sysfs_set_num(sra, NULL, "raid_disks", content->array.raid_disks + content->delta_disks);
+	}
+
 	if (sra)
 		sysfs_free(sra);
 
diff --git a/mdadm/mdadm/super-intel.c b/mdadm/mdadm/super-intel.c index 538fc9f..ec73f7e 100644
--- a/mdadm/mdadm/super-intel.c
+++ b/mdadm/mdadm/super-intel.c
@@ -850,6 +850,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
 	printf("    Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
 	printf("         Family : %08x\n", __le32_to_cpu(mpb->family_num));
 	printf("     Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
+	info.devs = NULL;
 	getinfo_super_imsm(st, &info);
 	fname_from_uuid(st, &info, nbuf, ':');
 	printf("           UUID : %s\n", nbuf + 5);
@@ -877,6 +878,7 @@ static void examine_super_imsm(struct supertype *st, char *homehost)
 		struct imsm_dev *dev = __get_imsm_dev(mpb, i);
 
 		super->current_vol = i;
+		info.devs = NULL;
 		getinfo_super_imsm(st, &info);
 		fname_from_uuid(st, &info, nbuf, ':');
 		print_imsm_dev(dev, nbuf + 5, super->disks->index); @@ -900,6 +902,7 @@ static void brief_examine_super_imsm(struct supertype *st, int verbose)
 		return;
 	}
 
+	info.devs = NULL;
 	getinfo_super_imsm(st, &info);
 	fname_from_uuid(st, &info, nbuf, ':');
 	printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5); @@ -917,12 +920,14 @@ static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
 	if (!super->anchor->num_raid_devs)
 		return;
 
+	info.devs = NULL;
 	getinfo_super_imsm(st, &info);
 	fname_from_uuid(st, &info, nbuf, ':');
 	for (i = 0; i < super->anchor->num_raid_devs; i++) {
 		struct imsm_dev *dev = get_imsm_dev(super, i);
 
 		super->current_vol = i;
+		info.devs = NULL;
 		getinfo_super_imsm(st, &info);
 		fname_from_uuid(st, &info, nbuf1, ':');
 		printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n", @@ -937,6 +942,7 @@ static void export_examine_super_imsm(struct supertype *st)
 	struct mdinfo info;
 	char nbuf[64];
 
+	info.devs = NULL;
 	getinfo_super_imsm(st, &info);
 	fname_from_uuid(st, &info, nbuf, ':');
 	printf("MD_METADATA=imsm\n");
@@ -950,6 +956,7 @@ static void detail_super_imsm(struct supertype *st, char *homehost)
 	struct mdinfo info;
 	char nbuf[64];
 
+	info.devs = NULL;
 	getinfo_super_imsm(st, &info);
 	fname_from_uuid(st, &info, nbuf, ':');
 	printf("\n           UUID : %s\n", nbuf + 5);
@@ -959,6 +966,7 @@ static void brief_detail_super_imsm(struct supertype *st)  {
 	struct mdinfo info;
 	char nbuf[64];
+	info.devs = NULL;
 	getinfo_super_imsm(st, &info);
 	fname_from_uuid(st, &info, nbuf, ':');
 	printf(" UUID=%s", nbuf + 5);
@@ -1624,6 +1632,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
 	struct dl *dl;
 	char *devname;
 	int minor;
+	__u32 blocks_per_member;
+	__u32 blocks_per_strip;
 
 	for (dl = super->disks; dl; dl = dl->next)
 		if (dl->raiddisk == info->disk.raid_disk) @@ -1631,7 +1641,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
 	info->container_member	  = super->current_vol;
 	info->array.raid_disks    = map->num_members;
 	info->array.level	  = get_imsm_raid_level(map);
-	info->array.layout	  = imsm_level_to_layout(info->array.level);
+	if (info->array.level == 4) {
+		map->raid_level = 5;
+		info->array.level = 5;
+		info->array.layout = ALGORITHM_PARITY_N;
+	} else {
+		info->array.layout	  = imsm_level_to_layout(info->array.level);
+	}
 	info->array.md_minor	  = -1;
 	info->array.ctime	  = 0;
 	info->array.utime	  = 0;
@@ -1649,7 +1665,15 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
 	}
 
 	info->data_offset	  = __le32_to_cpu(map->pba_of_lba0);
-	info->component_size	  = __le32_to_cpu(map->blocks_per_member);
+	/* FIXME: For some unknown reason sometimes in a volume created by
+	 * IMSM blocks_per_member is not a multiple of blocks_per strip.
+	 * Fix blocks_per_member here:
+	 */
+	blocks_per_member = __le32_to_cpu(map->blocks_per_member);
+	blocks_per_strip = __le16_to_cpu(map->blocks_per_strip);
+	blocks_per_member &= ~(blocks_per_strip - 1);
+	info->component_size = blocks_per_member;
+
 	memset(info->uuid, 0, sizeof(info->uuid));
 	info->recovery_start = MaxSector;
 	info->reshape_active = 0;
@@ -1673,7 +1697,43 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info)
 			 */
 		case MIGR_REBUILD:
 			/* this is handled by container_content_imsm() */
-		case MIGR_GEN_MIGR:
+		case MIGR_GEN_MIGR: {
+			struct imsm_map *prev_map;
+			int data_members;
+
+			load_imsm_migr_rec(super, info);
+
+			info->reshape_progress = (unsigned long long)
+			  __le32_to_cpu(super->migr_rec->blocks_per_unit) *
+			  __le32_to_cpu(super->migr_rec->curr_migr_unit);
+
+			/* set previous and new map configurations */
+			prev_map = get_imsm_map(dev, 1);
+			info->reshape_active = 1;
+			info->array.raid_disks = prev_map->num_members;
+			info->delta_disks = map->num_members - prev_map->num_members;
+			info->new_level = info->array.level;
+			info->array.level = get_imsm_raid_level(prev_map);
+			info->new_layout = info->array.layout;
+			info->array.layout = imsm_level_to_layout(info->array.level);
+			info->array.chunk_size = __le16_to_cpu(prev_map->blocks_per_strip) << 9;
+			info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
+
+			if (info->array.level == 4) {
+				prev_map->raid_level = 5;
+				info->array.level = 5;
+				info->array.layout = ALGORITHM_PARITY_N;
+			}
+
+			/* IMSM FIX for blocks_per_member */
+			blocks_per_strip = __le16_to_cpu(prev_map->blocks_per_strip);
+			blocks_per_member &= ~(blocks_per_strip - 1);
+			info->component_size = blocks_per_member;
+
+			/* Calculate previous array size */
+			data_members = imsm_num_data_members(dev, 1);
+			info->custom_array_size = blocks_per_member * data_members;
+		}
 		case MIGR_STATE_CHANGE:
 			/* FIXME handle other migrations */
 		default:
@@ -2445,6 +2505,117 @@ struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
 	return ptr;
 }
 
+/* Switches N-disk Raid0 map configuration (N+1)disk Raid4  */ void 
+switch_raid0_configuration(struct imsm_super *mpb, struct imsm_map
+*map) {
+	__u8 *src, *dst;
+	int bytes_to_copy;
+
+	/* get the pointer to the rest of the metadata */
+	src = (__u8 *)map + sizeof_imsm_map(map);
+
+	/* change the level and disk number to be compatible with IMSM */
+	map->raid_level = 4;
+	map->num_members++;
+
+	/* get the updated pointer to the rest of the metadata */
+	dst = (__u8 *)map + sizeof_imsm_map(map);
+	/* Now move the rest of the metadata to be properly aligned */
+	bytes_to_copy = mpb->mpb_size - (src - (__u8 *)mpb);
+	if (bytes_to_copy > 0)
+		memmove(dst, src, bytes_to_copy);
+	/* Now insert new entry to the map */
+	set_imsm_ord_tbl_ent(map, map->num_members - 1/*slot*/,
+			     mpb->num_disks | IMSM_ORD_REBUILD);
+	/* update size */
+	mpb->mpb_size += sizeof(__u32);
+}
+
+/* Make sure that in case of migration in progress we'll convert raid
+ * personalities so we could continue migrating  */ void 
+convert_raid_personalities(struct intel_super *super) {
+	struct imsm_super *mpb = super->anchor;
+	struct imsm_map *map;
+	struct imsm_disk *newMissing;
+	int i, map_modified = 0;
+	int bytes_to_copy;
+	__u8 *src, *dst;
+
+	for (i = 0; i < super->anchor->num_raid_devs; i++) {
+		struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
+
+		map_modified = 0;
+		if (dev_iter &&
+		    dev_iter->vol.migr_state == 1 &&
+		    dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
+			/* This device is migrating, check for raid0 levels */
+			map = get_imsm_map(dev_iter, 0);
+			if (map->raid_level == 0) {
+				/* Map0: Migrating raid0 detected - lets switch it to level4 */
+				switch_raid0_configuration(mpb, map);
+				map_modified++;
+			}
+			map = get_imsm_map(dev_iter, 1);
+			if (map->raid_level == 0) {
+				/* Map1: Migrating raid0 detected - lets switch it to level4 */
+				switch_raid0_configuration(mpb, map);
+				map_modified++;
+			}
+		}
+	}
+
+	if (map_modified > 0) {
+		/* Add missing device to the MPB disk table */
+		src = (__u8 *)mpb->disk + sizeof(struct imsm_disk) * mpb->num_disks;
+		mpb->num_disks++;
+		dst = (__u8 *)mpb->disk + sizeof(struct imsm_disk) * mpb->num_disks;
+
+		/* Now move the rest of the metadata to be properly aligned */
+		bytes_to_copy = mpb->mpb_size - (src - (__u8 *)mpb);
+		if (bytes_to_copy > 0)
+			memmove(dst, src, bytes_to_copy);
+
+		/* Update mpb size */
+		mpb->mpb_size += sizeof(struct imsm_disk);
+
+		/* Now fill in the new missing disk fields */
+		newMissing = (struct imsm_disk *)src;
+		sprintf((char *)newMissing->serial, "%s", "MISSING DISK");
+		/* copy the device size from the first disk */
+		newMissing->total_blocks = mpb->disk[0].total_blocks;
+		newMissing->scsi_id = 0x0;
+		newMissing->status = FAILED_DISK;
+	}
+}
+
+/* Check for unsupported migration features:
+ *  migration optimization area
+ */
+int check_mpb_migr_compatibility(struct intel_super *super) {
+	struct imsm_map *map0, *map1;
+	int i;
+
+	for (i = 0; i < super->anchor->num_raid_devs; i++) {
+		struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
+
+		if (dev_iter &&
+		    dev_iter->vol.migr_state == 1 &&
+		    dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
+			/* This device is migrating */
+			map0 = get_imsm_map(dev_iter, 0);
+			map1 = get_imsm_map(dev_iter, 1);
+			if (map0->pba_of_lba0 != map1->pba_of_lba0)
+				/* migration optimization area was used */
+				return -1;
+		}
+	}
+	return 0;
+}
+
 static void __free_imsm(struct intel_super *super, int free_disks);
 
 /* load_imsm_mpb - read matrix metadata @@ -2556,6 +2727,21 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
 		return 3;
 	}
 
+	/* Check for unsupported migration features */
+	if (check_mpb_migr_compatibility(super) != 0) {
+		if (devname)
+			fprintf(stderr,
+				Name ": Unsupported migration detected on %s\n",
+				devname);
+
+		return 4;
+	}
+
+	/* Now make sure that in case of migration
+	 * we'll convert raid personalities
+	 */
+	convert_raid_personalities(super);
+
 	/* FIXME the BBM log is disk specific so we cannot use this global
 	 * buffer for all disks.  Ok for now since we only look at the global
 	 * bbm_log_size parameter to gate assembly @@ -4601,6 +4787,8 @@ static void update_recovery_start(struct imsm_dev *dev, struct mdinfo *array)
 	rebuild->recovery_start = units * blocks_per_migr_unit(dev);  }
 
+int recover_backup_imsm(struct supertype *st, struct mdinfo *info,
+			void *ptr, int length);
 
 static struct mdinfo *container_content_imsm(struct supertype *st)  { @@ -4720,6 +4908,11 @@ static struct mdinfo *container_content_imsm(struct supertype *st)
 		}
 		/* now that the disk list is up-to-date fixup recovery_start */
 		update_recovery_start(dev, this);
+
+		/* check for reshape */
+		if (this && this->reshape_active == 1)
+			recover_backup_imsm(st, this, NULL, 0);
+
 		rest = this;
 	}
 

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux