[PATCH 1/2] mdadm: Add takeover support for external meta

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When performing takeover 0->10 or 10->0 mdmon should update the
external metadata (due to disk slot changes).
To achieve that mdadm, after changing the level in md,
mdadm calls update_super with "update_level" type.
update_super() allocates a new imsm_dev with updated
disk slot numbers to be processed by mdmon in process_update().
process_update() discovers missing disks and adds them to imsm
metadata.

Signed-off-by: Maciej Trela <maciej.trela@xxxxxxxxx>
---
 Grow.c        |   42 ++++++++++-
 managemon.c   |   13 +++
 mdadm.h       |    1 
 mdstat.c      |   24 ++++++
 monitor.c     |    2 -
 super-intel.c |  212 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 288 insertions(+), 6 deletions(-)

diff --git a/Grow.c b/Grow.c
index 6264996..ff04fc0 100644
--- a/Grow.c
+++ b/Grow.c
@@ -503,7 +503,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 	int ndata, odata;
 	int orig_level = UnSet;
 	char alt_layout[40];
-	int *fdlist;
+	int *fdlist, container_fd, dn;
 	unsigned long long *offsets;
 	int d, i;
 	int nrdisks;
@@ -515,7 +515,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 	int changed = 0;
 	int done;
 
-	struct mdinfo *sra;
+	struct mdinfo *sra, ext_sra;
 	struct mdinfo *sd;
 
 	if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) {
@@ -540,7 +540,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 			"       Please use a newer kernel\n");
 		return 1;
 	}
-	sra = sysfs_read(fd, 0, GET_LEVEL);
+	sra = sysfs_read(fd, 0, GET_VERSION | GET_LEVEL);
 	frozen = freeze_array(sra);
 	if (frozen < 0) {
 		fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
@@ -690,6 +690,42 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
 				fprintf(stderr, Name " level of %s changed to %s\n",
 					devname, c);
 			changed = 1;
+
+			st = super_by_fd(fd);
+			if (st && st->ss->external) {
+				sysfs_free(sra);
+				sra = sysfs_read(fd, 0,
+						 GET_VERSION | GET_LEVEL |
+						 GET_LAYOUT | GET_DISKS | GET_DEVS);
+
+				dn = devname2devnum(sra->text_version + 1);
+				container_fd = open_dev_excl(dn);
+				if (container_fd < 0) {
+					fprintf(stderr, Name ": Cannot get exclusive access "
+						"to container.\n");
+					rv = 1;
+					goto release;
+				}
+				st->ss->load_super(st, container_fd, NULL);
+				close(container_fd);
+				st->ss->getinfo_super(st, &ext_sra);
+				st->update_tail = &st->updates;
+				err = st->ss->update_super(st, sra, "update_level",
+							   ext_sra.name, 0, 0, NULL);
+				if (err != 0) {
+					/* no need to perform any updates with mdmon */
+					rv = 0;
+					goto release;
+				}
+				/* if raid0 was takeovered by any other
+				 * personality start mdmon */
+				if (level > 0 && orig.level == 0) {
+					if (!mdmon_running(dn))
+						start_mdmon(dn);
+					ping_monitor(devnum2devname(dn));
+				}
+				flush_metadata_updates(st);
+			}
 		}
 	}
 
diff --git a/managemon.c b/managemon.c
index 037406f..c279664 100644
--- a/managemon.c
+++ b/managemon.c
@@ -364,6 +364,9 @@ static void manage_container(struct mdstat_ent *mdstat,
 static void manage_member(struct mdstat_ent *mdstat,
 			  struct active_array *a)
 {
+	struct active_array *newa;
+	int level;
+
 	/* Compare mdstat info with known state of member array.
 	 * We do not need to look for device state changes here, that
 	 * is dealt with by the monitor.
@@ -382,6 +385,16 @@ static void manage_member(struct mdstat_ent *mdstat,
 	a->info.array.chunk_size = mdstat->chunk_size;
 	// MORE
 
+	level = mdstat_to_level(mdstat);
+	if (a->info.array.level != level && level >= 0) {
+		newa = duplicate_aa(a);
+		if (!newa)
+			goto out;
+		newa->info.array.level = level;
+		replace_array(a->container, a, newa);
+		a = newa;
+	}
+
 	if (a->check_degraded) {
 		struct metadata_update *updates = NULL;
 		struct mdinfo *newdev = NULL;
diff --git a/mdadm.h b/mdadm.h
index 362b66b..68c15ab 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -358,6 +358,7 @@ extern void free_mdstat(struct mdstat_ent *ms);
 extern void mdstat_wait(int seconds);
 extern void mdstat_wait_fd(int fd, const sigset_t *sigmask);
 extern int mddev_busy(int devnum);
+int mdstat_to_level(struct mdstat_ent *ms);
 
 struct map_ent {
 	struct map_ent *next;
diff --git a/mdstat.c b/mdstat.c
index 4a9f370..af1cae4 100644
--- a/mdstat.c
+++ b/mdstat.c
@@ -251,6 +251,30 @@ struct mdstat_ent *mdstat_read(int hold, int start)
 	return rv;
 }
 
+int mdstat_to_level(struct mdstat_ent *ms)
+{
+	if (strncmp(ms->level, "raid0", 5) == 0)
+		return 0;
+	else if (strncmp(ms->level, "raid10", 6) == 0)
+		return 10;
+	else if (strncmp(ms->level, "raid1", 5) == 0)
+		return 1;
+	else if (strncmp(ms->level, "raid4", 5) == 0)
+		return 4;
+	else if (strncmp(ms->level, "raid5", 5) == 0)
+		return 5;
+	else if (strncmp(ms->level, "raid6", 5) == 0)
+		return 6;
+	else if (strncmp(ms->level, "linear", 6) == 0)
+		return LEVEL_LINEAR;
+	else if (strncmp(ms->level, "faulty", 6) == 0)
+		return LEVEL_FAULTY;
+	else if (strncmp(ms->level, "multipath", 9) == 0)
+		return LEVEL_MULTIPATH;
+
+	return LEVEL_UNSUPPORTED;
+}
+
 void mdstat_wait(int seconds)
 {
 	fd_set fds;
diff --git a/monitor.c b/monitor.c
index e43e545..4578718 100644
--- a/monitor.c
+++ b/monitor.c
@@ -447,7 +447,7 @@ static int wait_and_act(struct supertype *container, int nowait)
 		/* once an array has been deactivated we want to
 		 * ask the manager to discard it.
 		 */
-		if (!a->container) {
+		if (!a->container || a->info.array.level == 0) {
 			if (discard_this) {
 				ap = &(*ap)->next;
 				continue;
diff --git a/super-intel.c b/super-intel.c
index a196ca3..b0bd0e6 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -283,6 +283,7 @@ enum imsm_update_type {
 	update_activate_spare,
 	update_create_array,
 	update_add_disk,
+	update_level,
 };
 
 struct imsm_update_activate_spare {
@@ -307,6 +308,13 @@ struct imsm_update_add_disk {
 	enum imsm_update_type type;
 };
 
+struct imsm_update_level {
+	enum imsm_update_type type;
+	int delta_disks;
+	int container_member;
+	struct imsm_dev dev;
+};
+
 static struct supertype *match_metadata_desc_imsm(char *arg)
 {
 	struct supertype *st;
@@ -1588,6 +1596,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
 	}
 }
 
+static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks);
+static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src);
+
 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
 			     char *update, char *devname, int verbose,
 			     int uuid_set, char *homehost)
@@ -1620,12 +1631,98 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
 	struct intel_super *super = st->sb;
 	struct imsm_super *mpb;
 
-	/* we can only update container info */
-	if (!super || super->current_vol >= 0 || !super->anchor)
+	if (!super || !super->anchor)
 		return 1;
 
 	mpb = super->anchor;
 
+	if (strcmp(update, "update_level") == 0) {
+		struct imsm_update_level *u;
+		struct imsm_dev *dev_new, *dev = NULL;
+		struct imsm_map *map_new, *map;
+		struct mdinfo *newdi;
+		struct dl *dl;
+		int *tmp_ord_tbl;
+		int i, slot, idx;
+		int len, disks;
+
+		if (!is_raid_level_supported(super->orom,
+					     info->array.level,
+					     info->array.raid_disks))
+			return 1;
+
+		for (i = 0; i < mpb->num_raid_devs; i++) {
+			dev = get_imsm_dev(super, i);
+			if (strcmp(devname, (char *)dev->volume) == 0)
+				break;
+		}
+		if (dev == NULL)
+			return 1;
+
+		if (i == super->anchor->num_raid_devs)
+			return 1;
+
+		map = get_imsm_map(dev, 0);
+
+		/* update level is needed only for 0->10 and 10->0 transitions */
+		if ((info->array.level != 10 || map->raid_level != 0) &&
+		    (info->array.level != 0 || map->raid_level != 10))
+			return 1;
+
+		disks = (info->array.raid_disks > map->num_members) ?
+			info->array.raid_disks : map->num_members;
+		len = sizeof(struct imsm_update_level) +
+			((disks - 1) * sizeof(__u32));
+
+		u = malloc(len);
+		if (u == NULL)
+			return 1;
+
+		dev_new = &u->dev;
+		imsm_copy_dev(dev_new, dev);
+		map_new = get_imsm_map(dev_new, 0);
+
+		tmp_ord_tbl = malloc(sizeof(int) * disks);
+		if (tmp_ord_tbl == NULL) {
+			free(u);
+			return 1;
+		}
+
+		for (i = 0; i < disks; i++)
+			tmp_ord_tbl[i] = -1;
+
+		/* iterate through devices to detect slot changes */
+		for (dl = super->disks; dl; dl = dl->next)
+			for (newdi = info->devs; newdi; newdi = newdi->next) {
+
+				if ((dl->major != newdi->disk.major) ||
+				    (dl->minor != newdi->disk.minor))
+					continue;
+				slot = get_imsm_disk_slot(map, dl->index);
+				idx = get_imsm_ord_tbl_ent(dev_new, slot);
+				tmp_ord_tbl[newdi->disk.raid_disk] = idx;
+				break;
+			}
+
+		for (i = 0; i < disks; i++)
+			set_imsm_ord_tbl_ent(map_new, i, tmp_ord_tbl[i]);
+		free(tmp_ord_tbl);
+
+		map_new->raid_level = info->array.level;
+		map_new->num_members = info->array.raid_disks;
+
+		u->type = update_level;
+		u->delta_disks = info->array.raid_disks - map->num_members;
+		u->container_member = info->container_member;
+		append_metadata_update(st, u, len);
+		rv = 0;
+		goto completed;
+	}
+
+	/* we can only update container info */
+	if (super->current_vol >= 0)
+		return 1;
+
 	if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
 		fprintf(stderr,
 			Name ": '--uuid' not supported for imsm metadata\n");
@@ -1653,6 +1750,7 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
 			Name ": '--update=%s' not supported for imsm metadata\n",
 			update);
 
+ completed:
 	/* successful update? recompute checksum */
 	if (rv == 0)
 		mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
@@ -1700,6 +1798,45 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
 	memcpy(dest, src, sizeof_imsm_dev(src, 0));
 }
 
+struct imsm_dev *reallocate_imsm_dev(struct intel_super *super,
+					 int array_index,
+					 int map_num_members)
+{
+	struct imsm_dev *newdev = NULL;
+	struct imsm_dev *retval = NULL;
+	struct intel_dev *dv = NULL;
+	struct imsm_dev *dv_free = NULL;
+	int memNeeded;
+
+	if (!super)
+		return NULL;
+
+	/* Calculate space needed for imsm_dev with a double map */
+	memNeeded = sizeof(struct imsm_dev) + sizeof(__u32) * (map_num_members - 1) +
+		sizeof(struct imsm_map) + sizeof(__u32) * (map_num_members - 1);
+
+	newdev = malloc(memNeeded);
+	if (!newdev) {
+		fprintf(stderr, "error: imsm meta update not possible due to no memory conditions\n");
+		return NULL;
+	}
+	/* Find our device */
+	for (dv = super->devlist; dv; dv = dv->next)
+		if (dv->index == array_index) {
+			/* Copy imsm_dev into the new buffer */
+			imsm_copy_dev(newdev, dv->dev);
+			dv_free = dv->dev;
+			dv->dev = newdev;
+			retval = newdev;
+			free(dv_free);
+			break;
+		}
+	if (retval == NULL)
+		free(newdev);
+
+	return retval;
+}
+
 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
 {
 	/*
@@ -4843,6 +4980,57 @@ static void imsm_process_update(struct supertype *st,
 	mpb = super->anchor;
 
 	switch (type) {
+	case update_level: {
+		struct imsm_update_level *u = (void *) update->buf;
+		struct imsm_dev *dev_new, *dev = NULL;
+		struct imsm_map *map;
+		struct dl *d;
+		int i;
+
+		dev_new = &u->dev;
+		for (i = 0; i < mpb->num_raid_devs; i++) {
+			dev = get_imsm_dev(super, i);
+			if (strcmp((char *)dev_new->volume, (char *)dev->volume) == 0)
+				break;
+		}
+		if (i == super->anchor->num_raid_devs)
+			return;
+
+		if (dev == NULL)
+			return;
+
+		imsm_copy_dev(dev, dev_new);
+		map = get_imsm_map(dev, 0);
+		mpb->num_disks += u->delta_disks;
+
+		/* clear missing disks list */
+		while (super->missing) {
+			d = super->missing;
+			super->missing = d->next;
+			__free_imsm_disk(d);
+		}
+		find_missing(super);
+
+		/* clear new disk entries if number of disks increased*/
+		d = super->missing;
+		for (i = 0; i < map->num_members; i++) {
+			if (map->disk_ord_tbl[i] != -1)
+				continue;
+			assert(d != NULL);
+			if (!d)
+				break;
+			memset(&d->disk, 0, sizeof(d->disk));
+			strcpy((char *)d->disk.serial, "MISSING");
+			d->disk.total_blocks = map->blocks_per_member;
+			/* Set slot for missing disk */
+			set_imsm_ord_tbl_ent(map, i, d->index | IMSM_ORD_REBUILD);
+			d->raiddisk = i;
+			d = d->next;
+		}
+
+		super->updates_pending++;
+		break;
+	}
 	case update_activate_spare: {
 		struct imsm_update_activate_spare *u = (void *) update->buf; 
 		struct imsm_dev *dev = get_imsm_dev(super, u->array);
@@ -5098,6 +5286,26 @@ static void imsm_prepare_update(struct supertype *st,
 	size_t len = 0;
 
 	switch (type) {
+	case update_level: {
+		struct imsm_update_level *u = (void *) update->buf;
+		struct active_array *a;
+
+		dprintf("prepare_update(): update level\n");
+		len += u->delta_disks * sizeof(struct imsm_disk) +
+			u->delta_disks * sizeof(__u32);
+
+		for (a = st->arrays; a; a = a->next)
+			if (a->info.container_member == u->container_member)
+				break;
+		if (a == NULL)
+			break; /* what else we can do here? */
+
+		/* we'll add new disks to imsm_dev */
+		if (u->delta_disks > 0)
+			reallocate_imsm_dev(super, u->container_member,
+					    a->info.array.raid_disks);
+		break;
+	}
 	case update_create_array: {
 		struct imsm_update_create_array *u = (void *) update->buf;
 		struct intel_dev *dv;

��.n��������+%������w��{.n�����{����w��ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux