[PATCH 01/31] Add takeover support for external meta

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



When performing takeover 0->10 or 10->0 mdmon should update the external metadata (due to disk slot changes).
To achieve that mdadm, after changing the level in md, mdadm calls update_super with "update_level" type.
update_super() allocates a new imsm_dev with updated disk slot numbers to be processed by mdmon in process_update().
process_update() discovers missing disks and adds them to imsm metadata.

Signed-off-by: Maciej Trela <maciej.trela@xxxxxxxxx>
Signed-off-by: Adam Kwolek <adam.kwolek@xxxxxxxxx>
---

 mdadm/mdadm/Grow.c        |   14 ++
 mdadm/mdadm/managemon.c   |   15 ++
 mdadm/mdadm/mdadm.h       |    1
 mdadm/mdadm/mdstat.c      |   24 ++++
 mdadm/mdadm/monitor.c     |    2
 mdadm/mdadm/super-intel.c |  304 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 355 insertions(+), 5 deletions(-)

diff --git a/mdadm/mdadm/Grow.c b/mdadm/mdadm/Grow.c index ae60f64..1054979 100644
--- a/mdadm/mdadm/Grow.c
+++ b/mdadm/mdadm/Grow.c
@@ -821,7 +821,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        st->update_tail = &st->updates;
        }

-       sra = sysfs_read(fd, 0, GET_LEVEL);
+       sra = sysfs_read(fd, 0, GET_VERSION | GET_LEVEL);
        if (sra) {
                if (st->ss->external && st->subarray[0] == 0) {
                        array.level = LEVEL_CONTAINER;
@@ -1003,6 +1003,18 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                fprintf(stderr, Name " level of %s changed to %s\n",
                                        devname, c);
                        changed = 1;
+
+                       st = super_by_fd(fd);
+                       if (!st) {
+                               fprintf(stderr, Name ": cannot handle this array\n");
+                               return 1;
+                       } else {
+                               if (st && reshape_super(st, -1, level, UnSet, 0, 0, NULL, devname, !quiet)) {
+                                       rv = 1;
+                                       goto release;
+                               }
+                               sync_metadata(st);
+                       }
                }
        }

diff --git a/mdadm/mdadm/managemon.c b/mdadm/mdadm/managemon.c index 164e4f8..493c96e 100644
--- a/mdadm/mdadm/managemon.c
+++ b/mdadm/mdadm/managemon.c
@@ -381,6 +381,9 @@ static int disk_init_and_add(struct mdinfo *disk, struct mdinfo *clone,  static void manage_member(struct mdstat_ent *mdstat,
                          struct active_array *a)
 {
+       struct active_array *newa;
+       int level;
+
        /* Compare mdstat info with known state of member array.
         * We do not need to look for device state changes here, that
         * is dealt with by the monitor.
@@ -408,6 +411,16 @@ static void manage_member(struct mdstat_ent *mdstat,
        else
                frozen = 1; /* can't read metadata_version assume the worst */

+       level = mdstat_to_level(mdstat);
+       if (a->info.array.level != level && level >= 0) {
+               newa = duplicate_aa(a);
+               if (newa) {
+                       newa->info.array.level = level;
+                       replace_array(a->container, a, newa);
+                       a = newa;
+               }
+       }
+
        if (a->check_degraded && !frozen) {
                struct metadata_update *updates = NULL;
                struct mdinfo *newdev = NULL;
@@ -615,7 +628,7 @@ static void handle_message(struct supertype *container, struct metadata_update *

        if (msg->len == 0) { /* ping_monitor */
                int cnt;
-
+
                cnt = monitor_loop_cnt;
                if (cnt & 1)
                        cnt += 2; /* wait until next pselect */ diff --git a/mdadm/mdadm/mdadm.h b/mdadm/mdadm/mdadm.h index 64b32cc..fa800f9 100644
--- a/mdadm/mdadm/mdadm.h
+++ b/mdadm/mdadm/mdadm.h
@@ -373,6 +373,7 @@ extern void mdstat_wait(int seconds);  extern void mdstat_wait_fd(int fd, const sigset_t *sigmask);  extern int mddev_busy(int devnum);  extern struct mdstat_ent *mdstat_by_component(char *name);
+int mdstat_to_level(struct mdstat_ent *ms);

 struct map_ent {
        struct map_ent *next;
diff --git a/mdadm/mdadm/mdstat.c b/mdadm/mdadm/mdstat.c index 47be2bb..47f54cb 100644
--- a/mdadm/mdadm/mdstat.c
+++ b/mdadm/mdadm/mdstat.c
@@ -282,6 +282,30 @@ struct mdstat_ent *mdstat_read(int hold, int start)
        return rv;
 }

+int mdstat_to_level(struct mdstat_ent *ms) {
+       if (strncmp(ms->level, "raid0", 5) == 0)
+               return 0;
+       else if (strncmp(ms->level, "raid10", 6) == 0)
+               return 10;
+       else if (strncmp(ms->level, "raid1", 5) == 0)
+               return 1;
+       else if (strncmp(ms->level, "raid4", 5) == 0)
+               return 4;
+       else if (strncmp(ms->level, "raid5", 5) == 0)
+               return 5;
+       else if (strncmp(ms->level, "raid6", 5) == 0)
+               return 6;
+       else if (strncmp(ms->level, "linear", 6) == 0)
+               return LEVEL_LINEAR;
+       else if (strncmp(ms->level, "faulty", 6) == 0)
+               return LEVEL_FAULTY;
+       else if (strncmp(ms->level, "multipath", 9) == 0)
+               return LEVEL_MULTIPATH;
+
+       return LEVEL_UNSUPPORTED;
+}
+
 void mdstat_wait(int seconds)
 {
        fd_set fds;
diff --git a/mdadm/mdadm/monitor.c b/mdadm/mdadm/monitor.c index 23ff9ab..46c044e 100644
--- a/mdadm/mdadm/monitor.c
+++ b/mdadm/mdadm/monitor.c
@@ -487,7 +487,7 @@ static int wait_and_act(struct supertype *container, int nowait)
                /* once an array has been deactivated we want to
                 * ask the manager to discard it.
                 */
-               if (!a->container) {
+               if (!a->container || a->info.array.level == 0) {
                        if (discard_this) {
                                ap = &(*ap)->next;
                                continue;
diff --git a/mdadm/mdadm/super-intel.c b/mdadm/mdadm/super-intel.c index 7c5fcc4..5caa8fa 100644
--- a/mdadm/mdadm/super-intel.c
+++ b/mdadm/mdadm/super-intel.c
@@ -285,6 +285,7 @@ enum imsm_update_type {
        update_kill_array,
        update_rename_array,
        update_add_disk,
+       update_level,
 };

 struct imsm_update_activate_spare {
@@ -320,6 +321,13 @@ struct imsm_update_add_disk {
        enum imsm_update_type type;
 };

+struct imsm_update_level {
+       enum imsm_update_type type;
+       int delta_disks;
+       int container_member;
+       struct imsm_dev dev;
+};
+
 static struct supertype *match_metadata_desc_imsm(char *arg)  {
        struct supertype *st;
@@ -1666,6 +1674,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info)
        }
 }

+static int is_raid_level_supported(const struct imsm_orom *orom, int
+level, int raiddisks); static void imsm_copy_dev(struct imsm_dev *dest,
+struct imsm_dev *src);
+
 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
                             char *update, char *devname, int verbose,
                             int uuid_set, char *homehost)
@@ -1698,12 +1709,15 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info,
        struct intel_super *super = st->sb;
        struct imsm_super *mpb;

-       /* we can only update container info */
-       if (!super || super->current_vol >= 0 || !super->anchor)
+       if (!super || !super->anchor)
                return 1;

        mpb = super->anchor;

+       /* we can only update container info */
+       if (super->current_vol >= 0)
+               return 1;
+
        if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
                fprintf(stderr,
                        Name ": '--uuid' not supported for imsm metadata\n"); @@ -1778,6 +1792,45 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
        memcpy(dest, src, sizeof_imsm_dev(src, 0));  }

+struct imsm_dev *reallocate_imsm_dev(struct intel_super *super,
+                                        unsigned int array_index,
+                                        int map_num_members)
+{
+       struct imsm_dev *newdev = NULL;
+       struct imsm_dev *retval = NULL;
+       struct intel_dev *dv = NULL;
+       struct imsm_dev *dv_free = NULL;
+       int memNeeded;
+
+       if (!super)
+               return NULL;
+
+       /* Calculate space needed for imsm_dev with a double map */
+       memNeeded = sizeof(struct imsm_dev) + sizeof(__u32) * (map_num_members - 1) +
+               sizeof(struct imsm_map) + sizeof(__u32) * (map_num_members - 1);
+
+       newdev = malloc(memNeeded);
+       if (!newdev) {
+               fprintf(stderr, "error: imsm meta update not possible due to no memory conditions\n");
+               return NULL;
+       }
+       /* Find our device */
+       for (dv = super->devlist; dv; dv = dv->next)
+               if (dv->index == array_index) {
+                       /* Copy imsm_dev into the new buffer */
+                       imsm_copy_dev(newdev, dv->dev);
+                       dv_free = dv->dev;
+                       dv->dev = newdev;
+                       retval = newdev;
+                       free(dv_free);
+                       break;
+               }
+       if (retval == NULL)
+               free(newdev);
+
+       return retval;
+}
+
 static int compare_super_imsm(struct supertype *st, struct supertype *tst)  {
        /*
@@ -5123,6 +5176,57 @@ static void imsm_process_update(struct supertype *st,
        mpb = super->anchor;

        switch (type) {
+       case update_level: {
+               struct imsm_update_level *u = (void *)update->buf;
+               struct imsm_dev *dev_new, *dev = NULL;
+               struct imsm_map *map;
+               struct dl *d;
+               int i;
+               int start_disk;
+
+               dev_new = &u->dev;
+               for (i = 0; i < mpb->num_raid_devs; i++) {
+                       dev = get_imsm_dev(super, i);
+                       if (strcmp((char *)dev_new->volume, (char *)dev->volume) == 0)
+                               break;
+               }
+               if (i == super->anchor->num_raid_devs)
+                       return;
+
+               if (dev == NULL)
+                       return;
+
+               imsm_copy_dev(dev, dev_new);
+               map = get_imsm_map(dev, 0);
+               start_disk = mpb->num_disks;
+               mpb->num_disks += u->delta_disks;
+
+               /* clear missing disks list */
+               while (super->missing) {
+                       d = super->missing;
+                       super->missing = d->next;
+                       __free_imsm_disk(d);
+               }
+               find_missing(super);
+
+               /* clear new disk entries if number of disks increased*/
+               d = super->missing;
+               for (i = start_disk; i < map->num_members; i++) {
+                       assert(d != NULL);
+                       if (!d)
+                               break;
+                       memset(&d->disk, 0, sizeof(d->disk));
+                       strcpy((char *)d->disk.serial, "MISSING");
+                       d->disk.total_blocks = map->blocks_per_member;
+                       /* Set slot for missing disk */
+                       set_imsm_ord_tbl_ent(map, i, d->index | IMSM_ORD_REBUILD);
+                       d->raiddisk = i;
+                       d = d->next;
+               }
+
+               super->updates_pending++;
+               break;
+       }
        case update_activate_spare: {
                struct imsm_update_activate_spare *u = (void *) update->buf;
                struct imsm_dev *dev = get_imsm_dev(super, u->array); @@ -5442,6 +5546,26 @@ static void imsm_prepare_update(struct supertype *st,
        size_t len = 0;

        switch (type) {
+       case update_level: {
+               struct imsm_update_level *u = (void *) update->buf;
+               struct active_array *a;
+
+               dprintf("prepare_update(): update level\n");
+               len += u->delta_disks * sizeof(struct imsm_disk) +
+                       u->delta_disks * sizeof(__u32);
+
+               for (a = st->arrays; a; a = a->next)
+                       if (a->info.container_member == u->container_member)
+                               break;
+               if (a == NULL)
+                       break; /* what else we can do here? */
+
+               /* we'll add new disks to imsm_dev */
+               if (u->delta_disks > 0)
+                       reallocate_imsm_dev(super, u->container_member,
+                                           a->info.array.raid_disks);
+               break;
+       }
        case update_create_array: {
                struct imsm_update_create_array *u = (void *) update->buf;
                struct intel_dev *dv;
@@ -5561,6 +5685,181 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind  }  #endif /* MDASSEMBLE */

+static int update_level_imsm(struct supertype *st, struct mdinfo *info,
+                            char *devname, int verbose,
+                            int uuid_set, char *homehost)
+{
+       struct intel_super *super = st->sb;
+       struct imsm_super *mpb = super->anchor;
+       struct imsm_update_level *u;
+       struct imsm_dev *dev_new, *dev = NULL;
+       struct imsm_map *map_new, *map;
+       struct mdinfo *newdi;
+       struct dl *dl;
+       int *tmp_ord_tbl;
+       int i, slot, idx;
+       int len, disks;
+
+       if (!is_raid_level_supported(super->orom,
+                                    info->array.level,
+                                    info->array.raid_disks))
+               return 1;
+
+       for (i = 0; i < mpb->num_raid_devs; i++) {
+               dev = get_imsm_dev(super, i);
+               if (strcmp(devname, (char *)dev->volume) == 0)
+                       break;
+       }
+       if (dev == NULL)
+               return 1;
+
+       if (i == super->anchor->num_raid_devs)
+               return 1;
+
+       map = get_imsm_map(dev, 0);
+
+       /* update level is needed only for 0->10 and 10->0 transitions */
+       if ((info->array.level != 10 || map->raid_level != 0) &&
+           (info->array.level != 0 || map->raid_level != 10))
+               return 1;
+
+       disks = (info->array.raid_disks > map->num_members) ?
+               info->array.raid_disks : map->num_members;
+       len = sizeof(struct imsm_update_level) +
+               ((disks - 1) * sizeof(__u32));
+
+       u = malloc(len);
+       if (u == NULL)
+               return 1;
+
+       dev_new = &u->dev;
+       imsm_copy_dev(dev_new, dev);
+       map_new = get_imsm_map(dev_new, 0);
+
+       tmp_ord_tbl = malloc(sizeof(int) * disks);
+       if (tmp_ord_tbl == NULL) {
+               free(u);
+               return 1;
+       }
+
+       for (i = 0; i < disks; i++)
+               tmp_ord_tbl[i] = -1;
+
+       /* iterate through devices to detect slot changes */
+       for (dl = super->disks; dl; dl = dl->next)
+               for (newdi = info->devs; newdi; newdi = newdi->next) {
+                       if ((dl->major != newdi->disk.major) ||
+                           (dl->minor != newdi->disk.minor))
+                               continue;
+                       slot = get_imsm_disk_slot(map, dl->index);
+                       idx = get_imsm_ord_tbl_ent(dev_new, slot);
+                       tmp_ord_tbl[newdi->disk.raid_disk] = idx;
+                       break;
+               }
+
+       for (i = 0; i < disks; i++)
+               set_imsm_ord_tbl_ent(map_new, i, tmp_ord_tbl[i]);
+       free(tmp_ord_tbl);
+       map_new->raid_level = info->array.level;
+       map_new->num_members = info->array.raid_disks;
+       u->type = update_level;
+       u->delta_disks = info->array.raid_disks - map->num_members;
+       u->container_member = info->container_member;
+       append_metadata_update(st, u, len);
+
+       return 0;
+}
+
+
+int imsm_reshape_super(struct supertype *st, long long size, int level,
+                      int layout, int chunksize, int raid_disks,
+                      char *backup, char*dev, int verbouse) {
+       int ret_val = 1;
+       struct mdinfo *sra = NULL;
+       int fd = -1;
+       char buf[PATH_MAX];
+       char *devname = NULL;
+
+       devname = devnum2devname(st->devnum);
+       if (devname == NULL) {
+               dprintf("imsm: Error: imsm_reshape_super(): cannot get device name.\n");
+               return ret_val;
+       }
+
+       snprintf(buf, PATH_MAX, "/dev/%s", devname);
+       fd = open(buf , O_RDONLY | O_DIRECT);
+       if (fd < 0) {
+               dprintf("imsm: cannot open device\n");
+               goto imsm_reshape_super_exit;
+       }
+
+       if ((size == -1) && (layout == UnSet) && (raid_disks == 0) && (level != UnSet)) {
+               /* ok - this is takeover */
+               int container_fd;
+               int dn;
+               int err;
+
+               sra = sysfs_read(fd, 0,  GET_VERSION | GET_LEVEL |
+                                GET_LAYOUT | GET_DISKS | GET_DEVS);
+               if (sra == NULL) {
+                       fprintf(stderr, Name ": Cannot read sysfs info (imsm)\n");
+                       goto imsm_reshape_super_exit;
+               }
+               dn = devname2devnum(sra->text_version + 1);
+               container_fd = open_dev_excl(dn);
+               if (container_fd < 0) {
+                       fprintf(stderr, Name ": Cannot get exclusive access "
+                               "to container (imsm).\n");
+                       goto imsm_reshape_super_exit;
+               }
+               st->ss->load_super(st, container_fd, NULL);
+               close(container_fd);
+               st->ss->getinfo_super(st, sra);
+
+               /* send metadata update for raid10 takeover
+                * this means we are going from/to raid10
+                * to/from different than raid10 level
+                * if source level is raid0 mdmon is sterted only
+                */
+               if (((level == 10) || (sra->array.level == 10) || (sra->array.level == 0)) &&
+                    (level != sra->array.level) &&
+                    (level > 0)) {
+                       st->update_tail = &st->updates;
+                       err = update_level_imsm(st, sra, sra->name, 0, 0, NULL);
+                       if (err != 0) {
+                               /* no need to perform any updates with mdmon */
+                               ret_val = 0;
+                               goto imsm_reshape_super_exit;
+                       }
+                       /* if raid0 was takeovered by any other
+                        * personality start mdmon */
+                       if (sra->array.level == 0) {
+                               char *dname = devnum2devname(dn);
+                               if (!mdmon_running(dn))
+                                       start_mdmon(dn);
+                               if (dname) {
+                                       ping_monitor(dname);
+                                       free(dname);
+                               }
+                       }
+                       ret_val = 0;
+               }
+               sysfs_free(sra);
+               sra = NULL;
+       }
+
+imsm_reshape_super_exit:
+       sysfs_free(sra);
+       if (fd >= 0)
+               close(fd);
+       if (devname)
+               free(devname);
+
+       dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
+       return ret_val;
+}
+
 struct superswitch super_imsm = {
 #ifndef        MDASSEMBLE
        .examine_super  = examine_super_imsm,
@@ -5592,6 +5891,7 @@ struct superswitch super_imsm = {
        .match_metadata_desc = match_metadata_desc_imsm,
        .container_content = container_content_imsm,
        .default_geometry = default_geometry_imsm,
+       .reshape_super  = imsm_reshape_super,

        .external       = 1,
        .name = "imsm",

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux