[PATCH 23/33] Monitor: Spare sharing with domain/subset support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Czarnowska, Anna 
Sent: Monday, July 05, 2010 11:38 AM
To: Neil Brown
Cc: linux-raid@xxxxxxxxxxxxxxx; Czarnowska, Anna; Hawrylewicz Czarnowski, Przemyslaw; Labun, Marcin; Neubauer, Wojciech; Williams, Dan J; Ciechanowski, Ed; dledford@xxxxxxxxxx
Subject: [PATCH 23/33] Monitor: Spare sharing with domain/subset support

Works for both native and external metadata.
Moves spares between arrays/containers with matching domain and subset to use for rebuild.

Signed-off-by: Anna Czarnowska <anna.czarnowska@xxxxxxxxx>
---
 Monitor.c |  209 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 202 insertions(+), 7 deletions(-)

diff --git a/Monitor.c b/Monitor.c
index ed57af0..11f3758 100644
--- a/Monitor.c
+++ b/Monitor.c
@@ -49,6 +49,7 @@ struct state {
      int devstate[MaxDisks];
      int devid[MaxDisks];
      int percent;
+     unsigned long long min_size;
      char *metadata_version;
      struct state *next;
      struct state *volumes;
@@ -56,6 +57,9 @@ struct state {
      struct state *missing;
 };
 
+static void spare_sharing(struct state *statelist, char *mailaddr,
+                   char *mailfrom, char *alert_cmd, int dosyslog);
+
 int Monitor(mddev_dev_t devlist,
          char *mailaddr, char *alert_cmd,
          int period, int daemonise, int scan, int oneshot, @@ -92,11 +96,10 @@ int Monitor(mddev_dev_t devlist,
       *    DeviceDisappeared
       *      Couldn't access a device which was previously visible
       *
-     * if we detect an array with active<raid and spare==0
-     * we look at other arrays that have same spare-group
-     * If we find one with active==raid and spare>0,
-     *  and if we can get_disk_info and find a name
-     *  Then we hot-remove and hot-add to the other array
+     * If we detect an array with active<raid and spare==0
+     * we look at other arrays that have a spare
+     * and are in the same domain and subset
+     * Then we hot-remove and hot-add to the other array
       *
       * If devlist is NULL, then we can monitor everything because --scan
       * was given.  We get an initial list from config file and add anything @@ -185,6 +188,9 @@ int Monitor(mddev_dev_t devlist,
                  st->parent = NULL;
                  st->volumes = NULL;
                  st->total = 0;
+                 st->min_size = 0;
+                 memset(st->devid, 0, MaxDisks*sizeof(int));
+                 memset(st->devstate, 0, MaxDisks*sizeof(int));
                  if (mdlist->spare_group)
                        st->spare_group = strdup(mdlist->spare_group);
                  else
@@ -211,6 +217,9 @@ int Monitor(mddev_dev_t devlist,
                  st->parent = NULL;
                  st->volumes = NULL;
                  st->total = 0;
+                 st->min_size = 0;
+                 memset(st->devid, 0, MaxDisks*sizeof(int));
+                 memset(st->devstate, 0, MaxDisks*sizeof(int));
                  if (mdlist) {
                        st->expected_spares = mdlist->spare_disks;
                        if (mdlist->spare_group)
@@ -371,6 +380,7 @@ int Monitor(mddev_dev_t devlist,
                        int newstate=0;
                        int change;
                        char *dv = NULL;
+                       unsigned long long dsize;
                        disc.number = i;
                        if (i > array.raid_disks + array.nr_disks) {
                              newstate = 0;
@@ -415,6 +425,17 @@ int Monitor(mddev_dev_t devlist,
                        }
                        st->devstate[i] = newstate;
                        st->devid[i] = makedev(disc.major, disc.minor);
+
+                       /* for volumes only we get minimum disk size
+                       * (only active disks) */
+                       fd = open(dv, O_RDONLY);
+                       if (dv && newstate & (1<<MD_DISK_ACTIVE) &&
+                           array.raid_disks && fd >= 0 &&
+                           get_dev_size(fd, dv, &dsize) &&
+                           (st->min_size == 0 || dsize < st->min_size))
+                             st->min_size = dsize;
+                       if (fd >= 0)
+                             close(fd);
                  }
                  st->active = array.active_disks;
                  st->working = array.working_disks;
@@ -506,6 +527,7 @@ int Monitor(mddev_dev_t devlist,
                              new_found = 1;
                        }
            }
+           spare_sharing(statelist, mailaddr, mailfrom, alert_cmd, dosyslog);
            /* If an array has active < raid && spare == 0 && spare_group != NULL
             * Look for another array with spare > 0 and active == raid and same spare_group
             *  if found, choose a device and hotremove/hotadd @@ -577,8 +599,63 @@ int Monitor(mddev_dev_t devlist,
      return 0;
 }
 
+static int move_spare(struct state *st2, struct state *st1, int i, char *mailaddr,
+                 char *mailfrom, char *alert_cmd, int dosyslog)
+
+{
+     struct mddev_dev_s devlist;
+     char devname[20];
+     int from_fd, to_fd;
+
+     if (!st1 || !st2 || st2->devid[i] == 0)
+           return 0;
+
+     from_fd = open(st2->devname, O_RDONLY);
+     if (from_fd < 0)
+           return 0;
+
+     to_fd = open(st1->devname, O_RDONLY);
+     if (to_fd < 0) {
+           close(from_fd);
+           return 0;
+     }
+
+     devlist.next = NULL;
+     devlist.used = 0;
+     devlist.re_add = 0;
+     devlist.writemostly = 0;
+     devlist.devname = devname;
+     char *dv = map_dev(major(st2->devid[i]), minor(st2->devid[i]), 1);
+     if (!dv) {
+           close(from_fd);
+           close(to_fd);
+           return 0;
+     }
+     snprintf(devname, 20, "%s", dv);
+
+     devlist.disposition = 'r';
+     if (Manage_subdevs(st2->devname, from_fd, &devlist, -1) == 0) {
+           devlist.disposition = 'a';
+           if (Manage_subdevs(st1->devname, to_fd, &devlist, -1) == 0) {
+                 st2->devid[i] = 0;
+                 ping_manager(st2->devname);
+                 ping_manager(st1->devname);
+                 alert("MoveSpare", st1->devname, st2->devname, mailaddr,
+                       mailfrom, alert_cmd, dosyslog);
+                 close(from_fd);
+                 close(to_fd);
+                 return 1;
+           } else {
+                 Manage_subdevs(st2->devname, from_fd, &devlist, -1);
+           }
+     }
+     close(from_fd);
+     close(to_fd);
+     return 0;
+}
+
 /* check if disk is used in donor array (native) or any volume in donor container (external)*/ -int check_disk_is_free(struct state *donor, int disk_idx, int ext)
+static int check_disk_is_free(struct state *donor, int disk_idx, int 
+ext)
 {
      struct state *vol = NULL;
      int vol_disk;
@@ -601,7 +678,7 @@ int check_disk_is_free(struct state *donor, int disk_idx, int ext)
      return disk_idx;
 }
 
-int get_disk_domain_and_subset(int devid, char *metadata_version, struct domain_ent **domain,
+static int get_disk_domain_and_subset(int devid, char 
+*metadata_version, struct domain_ent **domain,
                              struct subset **subset, int fcheck)  {
      struct supertype *sty;
@@ -650,6 +727,124 @@ fail:
      return 0;
 }
 
+/* If an array has active < raid && spare == 0
+ * Look for another array/container with unused, unfailed spare
+ * and the same domain and subset
+ * if found, hotremove/hotadd the spare (to parent container in 
+external)  */ static  void spare_sharing(struct state *statelist, char 
+*mailaddr,
+                    char *mailfrom, char *alert_cmd, int dosyslog) {
+     struct state *st, *stp, *vol, *st2 = NULL;
+     struct domain_ent *domain, *spare_domain;
+     struct subset *subset, *spare_subset;
+     int i, ext, found;
+     unsigned long long ssize;
+
+     for (st = statelist; st; st = st->next) {
+           if (st->err || st->active == st->raid || st->spare > 0)
+                 continue;
+
+           found = 0;
+           ext = is_external(st->metadata_version);
+
+           /*
+           * for exernal metadata spare will be moved to parent container
+           */
+           if (ext) {
+                 stp = st->parent;
+                 if (!stp)
+                       continue;
+           } else {
+                 stp = st;
+           }
+           /* check if there is a spare in this array/parent container */
+           for (i = 0; i < stp->total; i++)
+                 if ((check_disk_is_free(stp, i, ext) == i) &&
+                     (!ext || !disk_faulty_from_id(stp->devid[i])))
+                       break;
+           if (i < stp->total)
+                 /* there is a spare in array/parent container,
+                 * if it is big enough it was probably just added
+                 * but mdmon has not started recovery yet
+                 * we will not add any more spares for now */
+                 if (dev_size_from_id(stp->devid[i], &ssize) &&
+                     ssize >= st->min_size)
+                       continue;
+
+           /* get any good disk from array to check domain and subset */
+           for (i = 0; i < st->total; i++)
+                 if (st->devid[i] > 0 &&
+                     (st->devstate[i] &  1<<MD_DISK_ACTIVE))
+                       break;
+
+           if (i == st->total)
+                 continue;
+
+           if (!get_disk_domain_and_subset(st->devid[i],
+                       stp->metadata_version, &domain, &subset, 0))
+                 continue;
+
+           /* search for an array/container with unused spare */
+           for (st2 = statelist; st2; st2 = st2->next) {
+                 if (st2->err || st2 == stp)
+                       continue;
+
+                 if ((ext && st2->parent != NULL) ||
+                     (strcmp(stp->metadata_version, st2->metadata_version)
+                             != 0))
+                       continue;
+
+                 if (ext) {
+                       /* if container has degraded volume
+                       * we can't remove spares */
+                       for (vol = st2->volumes; vol; vol = vol->volumes)
+                             if (vol->active < vol->raid)
+                                   break;
+                       if (vol)
+                             continue;
+                 } else {
+                       if (st2->active < st2->raid)
+                             continue;
+                 }
+
+                 for (i = 0; i < st2->total; i++) { /* find a spare */
+                       if (check_disk_is_free(st2, i, ext) == INT_MAX)
+                             continue;
+
+                       if (!get_disk_domain_and_subset(st2->devid[i],
+                                   st2->metadata_version,
+                                   &spare_domain, &spare_subset, ext))
+                             continue;
+
+                       /* check if domain and subset
+                       * are the same as for st */
+                       if ((domain != spare_domain) ||
+                           (subset != spare_subset)) {
+                             /* no point looking
+                             * in that array/container */
+                             break;
+                       } else {
+                             if (!dev_size_from_id(st2->devid[i], &ssize) ||
+                                 ssize < st->min_size)
+                                   continue;
+
+                             if (move_spare(st2, stp, i, mailaddr,
+                                          mailfrom, alert_cmd,
+                                          dosyslog)) {
+                                   found = 1;
+                                   /* stop searching disks */
+                                   break;
+                             }
+                       }
+                 }
+                 if (found)
+                       break; /* stop searching arrays */
+           }
+     }
+     return;
+}
+
 static void alert(char *event, char *dev, char *disc, char *mailaddr, char *mailfrom, char *cmd,
              int dosyslog)
 {
--
1.6.4.2


--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux