[AUTOREBUILD 6/8] Monitor: autorebuild functionality added

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



>From f45f97933fddce7d7fcf370e4a74e9281c7c0a38 Mon Sep 17 00:00:00 2001
From: Anna Czarnowska <anna.czarnowska@xxxxxxxxx>
Date: Tue, 28 Sep 2010 06:26:51 +0200
Subject: [AUTOREBUILD 6/8] Monitor: autorebuild functionality added

For each volume we check state, report any changes, note minimum size
of disks and link with parent container. After all information is updated
we call spare_sharing. spare_sharing searches suitable spares in other
arrays and moves them using move_spare to the arrays that need them.
move_spare removes spare from one array/container and adds to another.
If add fails we add back to original container. Manage_subdev function
is used to perform the spare relocation.

Signed-off-by: Marcin Labun <marcin.labun@xxxxxxxxx>
Signed-off-by: Anna Czarnowska <anna.czarnowska@xxxxxxxxx>
---
 Monitor.c |  312 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 287 insertions(+), 25 deletions(-)

diff --git a/Monitor.c b/Monitor.c
index 93dd15d..62cbe98 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -30,6 +30,13 @@
 #include	<limits.h>
 #include	<syslog.h>
 
+/* define verbose mode for DEBUG compilation */
+#ifdef DEBUG
+#define VERBOSE 1
+#else
+#define VERBOSE (-1)
+#endif
+
 static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom,
 		  char *cmd, int dosyslog);
 
@@ -47,6 +54,7 @@ struct state {
 	int expected_spares;
 	int devstate[MaxDisks];
 	unsigned devid[MaxDisks];
+	unsigned long long min_size;
 	int percent;
 	char *metadata_version;
 	struct state *volumes;/* for a container it is a link its all volumes */
@@ -54,7 +62,8 @@ struct state {
 	struct state *next;
 };
 
-
+static void spare_sharing(struct state *statelist, char *mailaddr,
+			  char *mailfrom, char *alert_cmd, int dosyslog);
 
 static void add_to_cont(struct state *cont, struct state *vol)
 {
@@ -106,6 +115,10 @@ int Monitor(mddev_dev_t devlist,
 	 *    DeviceDisappeared
 	 *      Couldn't access a device which was previously visible
 	 *
+	 * If we detect an array with active<raid and spare==0
+	 * we look at other arrays that have a spare
+	 * and are in the same domain and subset
+	 * Then we hot-remove and hot-add to the other array
 	 *
 	 * If devlist is NULL, then we can monitor everything because --scan
 	 * was given.  We get an initial list from config file and add anything
@@ -113,6 +126,7 @@ int Monitor(mddev_dev_t devlist,
 	 */
 
 	int finished = 0;
+	int anydegraded;
 	struct mdstat_ent *mdstat = NULL;
 	char *mailfrom = NULL;
 	struct state *statelist = NULL;
@@ -222,6 +236,9 @@ int Monitor(mddev_dev_t devlist,
 			st->parent = NULL;
 			st->volumes = NULL;
 			st->total = 0;
+			st->min_size = 0;
+			memset(st->devid, 0, MaxDisks*sizeof(int));
+			memset(st->devstate, 0, MaxDisks*sizeof(int));
 			statelist = st;
 		}
 	} else {
@@ -242,6 +259,9 @@ int Monitor(mddev_dev_t devlist,
 			st->parent = NULL;
 			st->volumes = NULL;
 			st->total = 0;
+			st->min_size = 0;
+			memset(st->devid, 0, MaxDisks*sizeof(int));
+			memset(st->devstate, 0, MaxDisks*sizeof(int));
 			if (mdlist) {
 				st->expected_spares = mdlist->spare_disks;
 			}
@@ -254,6 +274,7 @@ int Monitor(mddev_dev_t devlist,
 		int new_found = 0;
 		struct state *st;
 
+		anydegraded = 0;
 		if (mdstat)
 			free_mdstat(mdstat);
 		mdstat = mdstat_read(oneshot?0:1, 0);
@@ -334,18 +355,17 @@ int Monitor(mddev_dev_t devlist,
 			 * metadata, so treat utime for external
 			 * metadata as different
 			 */
-			if ((st->utime == array.utime &&
-			   ((st->metadata_version == NULL) ||
-			     !is_external(st->metadata_version))) &&
+			if  (st->utime == array.utime &&
+			    (st->metadata_version &&
+			     !is_external(st->metadata_version)) &&
 			     st->failed == array.failed_disks &&
 			     st->working == array.working_disks &&
 			     st->spare == array.spare_disks &&
-			    (mse == NULL  || (mse->percent == st->percent))) {
-				close(fd);
+			    (mse->percent == st->percent)) {
 				st->err = 0;
+				close(fd);
 				continue;
 			}
-
 			if (st->utime == 0 && /* new array */
 			    mse->pattern && strchr(mse->pattern, '_') /* degraded */
 				)
@@ -409,6 +429,7 @@ int Monitor(mddev_dev_t devlist,
 				int newstate=0;
 				int change;
 				char *dv = NULL;
+				unsigned long long dsize;
 				disc.number = i;
 				if (i > array.raid_disks + array.nr_disks) {
 					newstate = 0;
@@ -453,6 +474,19 @@ int Monitor(mddev_dev_t devlist,
 				}
 				st->devstate[i] = newstate;
 				st->devid[i] = makedev(disc.major, disc.minor);
+
+				if (!share)
+					continue;
+				/* for volumes only we get minimum disk size
+				 * (only active disks) */
+				fd = open(dv, O_RDONLY);
+				if (dv && newstate & (1<<MD_DISK_ACTIVE) &&
+				    array.raid_disks && fd >= 0 &&
+				    get_dev_size(fd, dv, &dsize) &&
+				    (st->min_size == 0 || dsize < st->min_size))
+					st->min_size = dsize;
+				if (fd >= 0)
+					close(fd);
 			}
 			st->active = array.active_disks;
 			st->working = array.working_disks;
@@ -462,6 +496,8 @@ int Monitor(mddev_dev_t devlist,
 			st->raid = array.raid_disks;
 			st->total = array.raid_disks + array.nr_disks;
 			st->err = 0;
+			if ((st->active < st->raid) && st->spare == 0)
+				anydegraded = 1;
 			if (mse->metadata_version) {
 				if (!st->metadata_version)
 					st->metadata_version = strdup(mse->metadata_version);
@@ -515,27 +551,26 @@ int Monitor(mddev_dev_t devlist,
 					new_found = 1;
 				}
 		}
-
-		/* search the statelist to connect external
-		 * metadata volumes with their containers
-		 */
-		for (st = statelist; st; st = st->next) {
-			if (st->metadata_version &&
-			    is_external(st->metadata_version) &&
-			    is_subarray(st->metadata_version+9)) {
-				struct state *cont = NULL;
-
-				for (cont = statelist; cont; cont = cont->next) {
-					if (!cont->err &&
-					    cont->parent == NULL &&
-					    cont->metadata_version &&
-					    devname2devnum(st->metadata_version+10)
-					    == cont->devnum) {
-						add_to_cont(cont, st);
-						break;
+		if (share && anydegraded) {
+			/* parent-volume linking only needed when sharing spares */
+			for (st = statelist; st; st = st->next) {
+				if (!st->err &&
+				    st->metadata_version &&
+				    is_external(st->metadata_version) &&
+				    is_subarray(st->metadata_version+9)) {
+					struct state *cont = NULL;
+					for (cont = statelist; cont; cont = cont->next) {
+						if (!cont->err &&
+						cont->parent == NULL &&
+						cont->metadata_version &&
+						devname2devnum(st->metadata_version+10)	== cont->devnum) {
+							add_to_cont(cont, st);
+							break;
+						}
 					}
 				}
 			}
+			spare_sharing(statelist, mailaddr, mailfrom, alert_cmd, dosyslog);
 		}
 		if (!new_found) {
 			if (oneshot)
@@ -550,6 +585,233 @@ int Monitor(mddev_dev_t devlist,
 	return 0;
 }
 
+
+
+/* get states of all disks in native volume or container
+ * from kernel or metadata handler
+ */
+static struct mdinfo *get_raid_disk_info(struct state *st)
+{
+	struct supertype *sty = NULL;
+	int fd = -1, i, rv = 1;
+	unsigned id = 0;
+	struct mdinfo *infolist = NULL, *info;
+
+	/* ignore arrays with error and get info for containers
+	 *  or native volumes
+	 */
+	if (st->err || (is_external(st->metadata_version) &&
+			is_subarray(st->metadata_version+9)))
+		return NULL;
+
+	if (is_external(st->metadata_version)) {
+		fd = open(st->devname, O_RDONLY);
+		if (fd < 0)
+			return NULL;
+		sty = guess_super(fd);
+		if (!sty) {
+			close(fd);
+			return NULL;;
+		}
+		if (sty->ss->load_super(sty, fd, st->devname)) {
+			rv = 0;
+			goto cleanup;
+		}
+		infolist = sty->ss->getinfo_super_disks(sty);
+	} else
+		infolist = sysfs_read(-1, st->devnum,
+				      GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE|
+				      GET_DEGRADED|GET_COMPONENT|GET_VERSION);
+
+	if (!infolist) {
+		rv = 0;
+		goto cleanup;
+	}
+	for (i = 0; i < st->total; i++) {
+		if (st->devid[i] == 0)
+			continue;
+		for (info = infolist->devs; info; info = info->next) {
+			id = makedev(info->disk.major, info->disk.minor);
+			if (st->devid[i] == id) {
+				st->devstate[i] = info->disk.state;
+				break;
+			}
+		}
+		if (!info)
+			st->devstate[i] = 1<<MD_DISK_FAULTY;
+	}
+
+ cleanup:
+	if (fd >= 0)
+		close(fd);
+	if (sty) {				\
+		sty->ss->free_super(sty);
+		free(sty);
+	}
+
+	if (!rv) {
+		if (infolist)
+			sysfs_free(infolist);
+		infolist = NULL;
+		return NULL;
+	}
+	return infolist;
+}
+
+int move_spare(struct state *st2, struct state *st1, unsigned *devid,
+	       char *mailaddr, char *mailfrom, char *alert_cmd,
+	       int dosyslog)
+{
+	struct mddev_dev_s devlist;
+	char devname[20];
+	int from_fd, to_fd;
+	if (!st1 || !st2 || (*devid) == 0)
+		return 0;
+	from_fd = open(st2->devname, O_RDONLY);
+	if (from_fd < 0)
+		return 0;
+	to_fd = open(st1->devname, O_RDONLY);
+	if (to_fd < 0) {
+		close(from_fd);
+		return 0;
+	}
+	devlist.next = NULL;
+	devlist.used = 0;
+	devlist.re_add = 0;
+	devlist.writemostly = 0;
+	devlist.devname = devname;
+	char *dv = map_dev(major(*devid), minor(*devid), 1);
+	if (!dv) {
+		close(from_fd);
+		close(to_fd);
+		return 0;
+	}
+	snprintf(devname, 20, "%s", dv);
+	devlist.disposition = 'r';
+	if (Manage_subdevs(st2->devname, from_fd, &devlist, VERBOSE, 0) == 0) {
+		devlist.disposition = 'a';
+		if (Manage_subdevs(st1->devname, to_fd, &devlist,
+				   VERBOSE, 0) == 0) {
+			*devid = 0;
+			ping_manager(st2->devname);
+			ping_manager(st1->devname);
+			alert("MoveSpare", st1->devname, st2->devname,
+			      mailaddr, mailfrom, alert_cmd, dosyslog);
+			close(from_fd);
+			close(to_fd);
+			return 1;
+		} else if (Manage_subdevs(st2->devname, from_fd,
+					  &devlist, VERBOSE, 0) != 0)
+			fprintf(stderr,
+				"Error: Adding back spare device"
+				"%s to container %s failed!\n",
+				st2->devname, dv);
+	}
+	/* Failed to add spare to new container */
+	close(from_fd);
+	close(to_fd);
+	return 0;
+}
+
+
+static int dev_suitable(unsigned devid, int devstate, unsigned long long size)
+{
+	unsigned long long ssize;
+	/* check if device not used in volumes, not failed, and big enough */
+	if ((devid > 0) && (devstate == 0) &&
+	    dev_size_from_id(devid, &ssize) && (ssize >= size))
+		return 1;
+	return 0;
+}
+
+
+/* If an array has active < raid && spare == 0
+ * Look for another array/container with unused, unfailed spare
+ * and the same domain
+ * if found, hotremove/hotadd the spare (to parent container in external)
+ */
+static void spare_sharing(struct state *statelist, char *mailaddr,
+			  char *mailfrom, char *alert_cmd, int dosyslog)
+{
+	struct state *st, *stp, *vol, *st2 = NULL;
+	int i, ext, found;
+	struct mdinfo *sra = NULL;
+
+	for (st = statelist; st; st = st->next) {
+		if (st->err || st->active == st->raid || st->spare > 0)
+			continue;
+
+		found = 0;
+		ext = is_external(st->metadata_version);
+		/*
+		 * for exernal metadata spare will be moved to parent container
+		 */
+		if (ext) {
+			stp = st->parent;
+			if (!stp)
+				continue;
+		} else {
+			stp = st;
+		}
+		/* get member device state updated */
+		sra = get_raid_disk_info(stp);
+		if (!sra) {
+			dprintf("no sra for device: %s\n", stp->devname);
+			continue;
+		}
+		sysfs_free(sra);
+		for (i = 0; i < stp->total; i++)
+			if (dev_suitable(stp->devid[i], stp->devstate[i],
+					 st->min_size))
+				break;
+		if (i < stp->total)
+			/* there is a spare in array/parent container,
+			 * it was probably just added
+			 * but mdmon has not started recovery yet
+			 * we will not add any more spares for now */
+			continue;
+
+		/* search for an array/container with unused spare */
+		for (st2 = statelist; st2; st2 = st2->next) {
+			if (st2->err || st2 == stp)
+				continue;
+			if ((ext && st2->parent != NULL) ||
+			    (strcmp(stp->metadata_version,
+				    st2->metadata_version) != 0))
+				continue;
+			if (ext) {
+				/* if container has degraded volume
+				 * we can't remove spares */
+				for (vol = st2->volumes; vol; vol = vol->volumes)
+					if (vol->active < vol->raid)
+						break;
+				if (vol)
+					continue;
+			} else {
+				if (st2->active < st2->raid)
+					continue;
+			}
+			/* support for domain comparision needed */
+			for (i = 0; i < st2->total; i++) {
+				if (!dev_suitable(st2->devid[i],
+						  st2->devstate[i],
+						  st->min_size))
+					continue;
+				if (move_spare(st2, stp, &st2->devid[i],
+					       mailaddr, mailfrom, alert_cmd,
+					       dosyslog)) {
+					found = 1;
+					/* stop searching disks */
+					break;
+				}
+			}
+			if (found)
+				break; /* stop searching arrays */
+		}
+	}
+	return;
+}
+
 static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom, char *cmd,
 		  int dosyslog)
 {
-- 
1.6.4.2

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux