[PATCH 1/3] mdadm: Enable takeover for external metadata.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Signed-off-by: Maciej Trela <maciej.trela@xxxxxxxxx>
---
 Grow.c        |   17 +++-
 managemon.c   |   16 +++-
 mdadm.h       |    3 +
 mdmon.h       |    4 +
 monitor.c     |   51 ++++++++-
 super-intel.c |  344 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 426 insertions(+), 9 deletions(-)

diff --git a/Grow.c b/Grow.c
index d8d91cb..d00b980 100644
--- a/Grow.c
+++ b/Grow.c
@@ -540,7 +540,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        "       Please use a newer kernel\n");
                return 1;
        }
-       sra = sysfs_read(fd, 0, GET_LEVEL);
+       sra = sysfs_read(fd, 0, GET_LEVEL | GET_VERSION);
        frozen = freeze_array(sra);
        if (frozen < 0) {
                fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
@@ -581,7 +581,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                if (size == 0)
                        size = array.size;
        }
-
+
        /* ======= set level =========== */
        if (level != UnSet && level != array.level) {
                /* Trying to change the level.
@@ -682,6 +682,19 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                fprintf(stderr, Name " level of %s changed to %s\n",
                                        devname, c);
                        changed = 1;
+
+                       /* if raid0 was takeovered by any other personality start mdmon */
+                       st = super_by_fd(fd);
+                       if (st->ss->external) {
+                               if ((level != 0) && (orig.level == 0))
+                               {
+                                       int dn = devname2devnum(sra->text_version + 1);
+                                       if (!mdmon_running(dn)) {
+                                               start_mdmon(dn);
+                                       }
+                                       ping_monitor(devnum2devname(dn));
+                               }
+                       }
                }
        }

diff --git a/managemon.c b/managemon.c
index e335077..86816c4 100644
--- a/managemon.c
+++ b/managemon.c
@@ -382,6 +382,16 @@ static void manage_member(struct mdstat_ent *mdstat,
        a->info.array.chunk_size = mdstat->chunk_size;
        // MORE

+       /* Takeover procedure */
+       if ((a->takeover == requested) && (a->container)) {
+               struct metadata_update *updates = NULL;
+               if (a->container->ss->takeover(a, &updates)) {
+                       a->container->ss->prepare_update(a->container, updates);
+                       a->takeover = pending;
+                       queue_metadata_update(updates);
+               }
+       }
+
        if (a->check_degraded) {
                struct metadata_update *updates = NULL;
                struct mdinfo *newdev = NULL;
@@ -479,7 +489,7 @@ static void manage_new(struct mdstat_ent *mdstat,
                return;

        mdi = sysfs_read(-1, mdstat->devnum,
-                        GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
+                        GET_LEVEL|GET_LAYOUT|GET_CHUNK|GET_DISKS|GET_COMPONENT|
                         GET_DEGRADED|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);

        new = malloc(sizeof(*new));
@@ -546,6 +556,10 @@ static void manage_new(struct mdstat_ent *mdstat,
        new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
        new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start");
        new->metadata_fd = sysfs_open(new->devnum, NULL, "metadata_version");
+       new->level_fd = sysfs_open(new->devnum, NULL, "level");
+       new->takeover = none;
+       new->prev_level  = -1;
+
        dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
                new->action_fd, new->info.state_fd);

diff --git a/mdadm.h b/mdadm.h
index 836c64d..bd722a0 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -613,6 +613,9 @@ extern struct superswitch {
        void (*prepare_update)(struct supertype *st,
                               struct metadata_update *update);

+       int (*takeover)(struct active_array *a,
+                           struct metadata_update **updates);
+
        /* activate_spare will check if the array is degraded and, if it
         * is, try to find some spare space in the container.
         * On success, it add appropriate updates (For process_update) to
diff --git a/mdmon.h b/mdmon.h
index 4494085..5c2f795 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -23,6 +23,7 @@ enum array_state { clear, inactive, suspended, readonly, read_auto,

 enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };

+enum takeover_stage { none, requested, pending, finished };

 struct active_array {
        struct mdinfo info;
@@ -32,6 +33,7 @@ struct active_array {
        int action_fd;
        int resync_start_fd;
        int metadata_fd; /* for monitoring rw/ro status */
+       int level_fd;

        enum array_state prev_state, curr_state, next_state;
        enum sync_action prev_action, curr_action, next_action;
@@ -39,6 +41,8 @@ struct active_array {
        int check_degraded; /* flag set by mon, read by manage */

        int devnum;
+       int prev_level, curr_level;
+       enum takeover_stage takeover;
 };

 /*
diff --git a/monitor.c b/monitor.c
index 81fef49..ea87f5f 100644
--- a/monitor.c
+++ b/monitor.c
@@ -100,6 +100,17 @@ static enum sync_action read_action( int fd)
        return (enum sync_action) sysfs_match_word(buf, sync_actions);
 }

+static int read_level(int fd)
+{
+       char buf[20];
+       int n = read_attr(buf, 20, fd);
+
+       if (n <= 0)
+               return -1;
+       int level = map_name(pers, buf);
+       return level;
+}
+
 int read_dev_state(int fd)
 {
        char buf[60];
@@ -204,7 +215,9 @@ static int read_and_act(struct active_array *a)
        a->next_action = bad_action;

        a->curr_state = read_state(a->info.state_fd);
-       a->curr_action = read_action(a->action_fd);
+       if (a->action_fd > 0)
+               a->curr_action = read_action(a->action_fd);
+       a->curr_level = read_level(a->level_fd);
        a->info.resync_start = read_resync_start(a->resync_start_fd);
        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
                mdi->next_state = 0;
@@ -214,6 +227,31 @@ static int read_and_act(struct active_array *a)
                }
        }

+       /* takeover operation was finished */
+       if (a->takeover == finished) {
+               /* end up with raid0, stop the monitor */
+               if (a->curr_level == 0)
+                       deactivate = 1;
+               /* update current level */
+               a->info.array.level = a->curr_level;
+               a->takeover = none;
+       }
+
+       if (a->takeover == none) {
+               /* check for raid level change */
+               if (a->curr_level != a->prev_level)     {
+                       /* start takeover procedure */
+                       a->takeover = requested;
+               }
+               if ((a->curr_level == 0) && (a->prev_level != 0)) {
+                       /* Raid level was changed to raid0
+                          so close unsupported sysfs handles */
+                       if (a->action_fd > 0)
+                               close(a->action_fd);
+                       a->action_fd = -1;
+               }
+       }
+
        if (a->curr_state <= inactive &&
            a->prev_state > inactive) {
                /* array has been stopped */
@@ -347,9 +385,14 @@ static int read_and_act(struct active_array *a)
        }
        dprintf(" )\n");

+       /* Takeover started. Signal manager. */
+       if (a->takeover == requested) {
+               signal_manager();
+       }
+
        /* move curr_ to prev_ */
+       a->prev_level = a->curr_level;
        a->prev_state = a->curr_state;
-
        a->prev_action = a->curr_action;

        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
@@ -460,7 +503,9 @@ static int wait_and_act(struct supertype *container, int nowait)
                }

                add_fd(&rfds, &maxfd, a->info.state_fd);
-               add_fd(&rfds, &maxfd, a->action_fd);
+               add_fd(&rfds, &maxfd, a->level_fd);
+               if (a->action_fd > 0)
+                       add_fd(&rfds, &maxfd, a->action_fd);
                for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
                        add_fd(&rfds, &maxfd, mdi->state_fd);

diff --git a/super-intel.c b/super-intel.c
index fcf438c..1bb721e 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -284,6 +284,7 @@ enum imsm_update_type {
        update_activate_spare,
        update_create_array,
        update_add_disk,
+       update_takeover,
 };

 struct imsm_update_activate_spare {
@@ -308,6 +309,21 @@ struct imsm_update_add_disk {
        enum imsm_update_type type;
 };

+struct slot_change {
+       int prev_slot;
+       int new_slot;
+};
+
+struct imsm_update_takeover {
+       enum imsm_update_type type;
+       int array;
+       int devnum;
+       int new_level;
+       int delta_disks;
+       struct slot_change sl_changes[IMSM_MAX_DEVICES];
+       int sl_changed;
+};
+
 static struct supertype *match_metadata_desc_imsm(char *arg)
 {
        struct supertype *st;
@@ -1698,6 +1714,42 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
        memcpy(dest, src, sizeof_imsm_dev(src, 0));
 }

+struct imsm_dev *reallocate_imsm_dev(struct intel_super *super,
+                                                                        int array_index,
+                                                                        int map0_num_members,
+                                                                        int map1_num_members)
+{
+       struct imsm_dev *newdev = NULL;
+       struct imsm_dev *retVal = NULL;
+       struct intel_dev *dv = NULL;
+       int memNeeded;
+
+       if (super) {
+               /* Calculate space needed for imsm_dev with a single map */
+               memNeeded = sizeof(struct imsm_dev) + sizeof(__u32) * (map0_num_members - 1);
+               /* Check if we need second map */
+               if (map1_num_members > 0)
+                       memNeeded += sizeof(struct imsm_map) + sizeof(__u32) * (map1_num_members - 1);
+
+               newdev = malloc(memNeeded);
+               if (!newdev) {
+                       fprintf(stderr, "error: imsm meta update not possible due to no memory conditions\n");
+                       return NULL;
+               }
+               /* Find our device */
+               for (dv = super->devlist; dv; dv = dv->next)
+                       if (dv->index == array_index) {
+                               /* Copy imsm_dev into the new buffer */
+                               imsm_copy_dev(newdev, dv->dev);
+                               free(dv->dev);
+                               dv->dev = newdev;
+                               retVal = newdev;
+                               break;
+                       }
+       }
+       return retVal;
+}
+
 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
 {
        /*
@@ -2056,7 +2108,7 @@ static int parse_raid_devices(struct intel_super *super)
                len_migr = sizeof_imsm_dev(dev_iter, 1);
                if (len_migr > len)
                        space_needed += len_migr - len;
-
+
                dv = malloc(sizeof(*dv));
                if (!dv)
                        return 1;
@@ -2065,6 +2117,7 @@ static int parse_raid_devices(struct intel_super *super)
                        free(dv);
                        return 1;
                }
+
                imsm_copy_dev(dev_new, dev_iter);
                dv->dev = dev_new;
                dv->index = i;
@@ -3007,7 +3060,7 @@ static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
        map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
        map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
        map->failed_disk_num = ~0;
-       map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
+       map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
                                       IMSM_T_STATE_NORMAL;
        map->ddf = 1;

@@ -4076,7 +4129,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)

                this = malloc(sizeof(*this));
                if (!this) {
-                       fprintf(stderr, Name ": failed to allocate %lu bytes\n",
+                       fprintf(stderr, Name ": failed to allocate %u bytes\n",
                                sizeof(*this));
                        break;
                }
@@ -4637,6 +4690,121 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
        return dl;
 }

+
+static int imsm_takeover(struct active_array *a,
+                        struct metadata_update **updates)
+{
+       struct intel_super *super = a->container->sb;
+       struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+       struct imsm_map *map = get_imsm_map(dev, 0);
+       struct metadata_update *mu;
+       struct imsm_update_takeover *u;
+       struct mdinfo *di, *mdi, *newdi;
+       struct mdinfo info;
+       struct dl *dl;
+       size_t len;
+       int slot, i;
+
+       /* First check if actual takeover occured */
+       super->current_vol = a->info.container_member;
+       getinfo_super_imsm(a->container, &info);
+
+       if (info.array.level == a->curr_level)
+       {
+               /* no raid level was actually changed */
+               fprintf(stderr, "Takeover started but no raid level was changed!\n");;
+               return 0;
+       }
+
+       /* Read sysfs devs */
+       mdi = sysfs_read(-1, a->devnum, GET_DEVS|SKIP_GONE_DEVS);
+       if (!mdi) {
+               fprintf(stderr, "Could not read sysfs!\n");;
+               return 0;
+       }
+
+       /* allocate memory for update struct */
+       len = sizeof(*u);
+       mu = malloc(sizeof(*mu));
+       if (mu) {
+               mu->buf = malloc(len);
+               if (mu->buf == NULL) {
+                       free(mu);
+                       fprintf(stderr, "%s: failed to allocate update buffer\n",
+                                       __func__);
+                       return 0;
+               }
+       } else {
+               fprintf(stderr, "%s: failed to allocate update buffer\n",
+                               __func__);
+               return 0;
+       }
+
+       /* initialize update struct */
+       mu->space = NULL;
+       mu->len = len;
+       mu->next = *updates;
+       u = (struct imsm_update_takeover*) mu->buf;
+       u->type = update_takeover;
+       u->array = a->info.container_member;
+       u->devnum = a->devnum;
+       u->new_level = a->curr_level;
+
+       /* now itarate through devices to detect any slot changes */
+       i = 0;
+       for (dl = super->disks; dl; dl = dl->next) {
+               for (newdi = mdi->devs; newdi; newdi = newdi->next)
+               {
+                       if ((dl->major == newdi->disk.major) &&
+                           (dl->minor == newdi->disk.minor)) {
+
+                               slot = get_imsm_disk_slot(map, dl->index);
+                               if (newdi->disk.raid_disk != slot)
+                           {
+                                       /* slot change detected, add the change to update struct */
+                                       u->sl_changes[i].prev_slot = slot;
+                                       u->sl_changes[i].new_slot = newdi->disk.raid_disk;
+                                       i++;
+                           }
+                               /* Update current mdadm device list */
+                               for(di = a->info.devs; di; di = di -> next)
+                           {
+                                       if ((di->disk.major == newdi->disk.major) &&
+                                               (di->disk.minor == newdi->disk.minor)) {
+                                               di->disk.raid_disk = newdi->disk.raid_disk;
+                                       }
+                           }
+                               break;
+                       }
+               }
+       }
+       /* how many slot changes */
+       u->sl_changed = i;
+
+       /* depending on raid level change, set delta disks */
+       u->delta_disks = 0;
+       if ((info.array.level == 0) && (a->curr_level == 5)) {
+               /* level changed: raid0 -> raid5 */
+               u->delta_disks = 1;
+       }
+       if ((info.array.level == 5) && (a->curr_level == 0)) {
+               /* level changed: raid5 -> raid0 */
+               u->delta_disks = -1;
+       }
+       if ((info.array.level == 10) && (a->curr_level == 0)) {
+               /* level changed: raid10 -> raid0 */
+               u->delta_disks = -2;
+       }
+       if ((info.array.level == 0) && (a->curr_level == 10)) {
+               /* level changed: raid0 -> raid10 */
+               u->delta_disks = 2;
+       }
+
+       *updates = mu;
+       return 1;
+}
+
+
 static struct mdinfo *imsm_activate_spare(struct active_array *a,
                                          struct metadata_update **updates)
 {
@@ -4807,6 +4975,29 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_

 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index);

+static int find_free_slot(struct mdinfo *info, int prev_slot)
+{
+       struct mdinfo* tmp_info;
+       int slot = prev_slot + 1;
+
+       /* iterate throuth mdinfo devs and find first free slot
+          that is beyond prev_slot argument */
+       do {
+               tmp_info = info;
+               while (tmp_info)
+               {
+                       if (tmp_info->disk.raid_disk == slot)
+                       {
+                               /* current slot is occupied */
+                               slot++;
+                               break;
+                       }
+                       tmp_info = tmp_info->next;
+               }
+       } while (tmp_info);
+       return slot;
+}
+
 static void imsm_process_update(struct supertype *st,
                                struct metadata_update *update)
 {
@@ -4841,6 +5032,137 @@ static void imsm_process_update(struct supertype *st,
        mpb = super->anchor;

        switch (type) {
+
+       case update_takeover: {
+               struct imsm_update_takeover *u = (void *) update->buf;
+               struct imsm_dev *dev = get_imsm_dev(super, u->array);
+               struct imsm_map *map = get_imsm_map(dev, 0);
+               struct active_array *array;
+               struct mdinfo *d;
+               struct dl *dl = NULL;
+               int slot=-1, i;
+
+               /* Initialize pointer to the proper active array */
+               for (array = st->arrays; array; array = array -> next)
+                       if (array->devnum == u->devnum) {
+                               d = array->info.devs;
+                               break;
+                       }
+
+               if (!array) {
+                       fprintf(stderr, "error: could not find any active array!\n");
+                       return;
+               }
+
+               if (u->delta_disks > 0) {
+                       /* we'll be adding missing disks: Raid0->Raid5 or Raid0->Raid10 *
+                        * First, reallocate Dev due to new map disks entries */
+                       struct imsm_dev *newdev = NULL;
+                       newdev = reallocate_imsm_dev(super, u->array,
+                                                                                map->num_members + u->delta_disks,
+                                                                                0);
+                       if (newdev == NULL)
+                       {
+                               fprintf(stderr, "error: could not reallocate dev structure!\n");
+                               return;
+                       }
+
+                       /* Refresh pointers */
+                       dev = get_imsm_dev(super, u->array);
+                       map = get_imsm_map(dev, 0);
+
+                       /* Add missing disk */
+                       if (super->missing) {
+                               fprintf(stderr, "error: there are already missing disks. Takeover failed!\n");
+                               break;
+                       }
+
+                       /* Update slots in the raid map */
+                       for (i = 0; i < u->sl_changed; i++) {
+                               int idx;
+                               idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot);
+                               /* now after slot changes */
+                               set_imsm_ord_tbl_ent(map, u->sl_changes[i].new_slot, idx);
+                       }
+
+                       /* Now create new devices representing missing disks */
+                       slot=-1;
+                       for (i = 0; i < u->delta_disks; i++) {
+                           dl = malloc(sizeof(*dl));
+                               if (!dl) {
+                                       fprintf(stderr, Name": could not allocate raid device\n");
+                                       return;
+                               }
+                           dl->major = dl->minor = 0;
+                           dl->fd = -1;
+                           dl->devname = strdup("missing");
+                               dl->index = mpb->num_disks;
+                           sprintf((char*)dl->disk.serial, "MISSING_DISK%d", i);
+                           dl->disk.total_blocks = -1;
+                           dl->disk.scsi_id = -1;
+                           dl->disk.status = FAILED_DISK;
+                           dl->disk.owner_cfg_num = 0;
+                           dl->e = NULL;
+                           dl->next = super->missing;
+                           super->missing = dl;
+                           /* Set slot for missing disk */
+                           slot = find_free_slot(d, slot);
+                           set_imsm_ord_tbl_ent(map, slot, dl->index | IMSM_ORD_REBUILD);
+                           dl->raiddisk = slot;
+                           /* Increase number of disks */
+                           mpb->num_disks++;
+                       }
+
+                       /* Update map */
+                       map->num_members += u->delta_disks;
+                       map->failed_disk_num = dl->index;
+                       map->map_state = IMSM_T_STATE_DEGRADED;
+               }
+
+               if (u->delta_disks < 0)
+               {
+                       /* we'll be adding missing disks: Raid5->Raid0 or Raid10->Raid0 */
+                       int missing_disks = 0;
+                       dl = super->missing;
+
+                       /* Count missing drives */
+                       while(dl) {
+                               missing_disks--;
+                               dl = dl->next;
+                       }
+                   if (missing_disks != (u->delta_disks)) {
+                       fprintf(stderr, "error: incorrect missing drive number for takeover: %d\n",
+                                               missing_disks);
+                       break;
+                   }
+                   /* removing missing disk */
+                   while (super->missing) {
+                               dl = super->missing;
+                               super->missing = dl->next;
+                               __free_imsm_disk(dl);
+                   }
+                   /* Update slots in the raid map */
+                   for (i = 0; i < u->sl_changed; i++) {
+                               int idx;
+                               idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot);
+                               /* now after slot changes */
+                               set_imsm_ord_tbl_ent(map, u->sl_changes[i].new_slot, idx);
+                       }
+                   /* Update mpb */
+                   mpb->num_disks += u->delta_disks;
+                   map->num_members += u->delta_disks;
+                   map->failed_disk_num = 0;
+                   map->map_state = IMSM_T_STATE_NORMAL;
+               }
+
+               /* Update raid level */
+               map->raid_level = u->new_level;
+               super->updates_pending++;
+               array->takeover = finished;
+
+               imsm_update_version_info(super);
+               break;
+       }
        case update_activate_spare: {
                struct imsm_update_activate_spare *u = (void *) update->buf;
                struct imsm_dev *dev = get_imsm_dev(super, u->array);
@@ -5096,6 +5418,20 @@ static void imsm_prepare_update(struct supertype *st,
        size_t len = 0;

        switch (type) {
+
+       case update_takeover: {
+               struct imsm_update_takeover *u = (void *) update->buf;
+               struct imsm_dev *dev = get_imsm_dev(super, u->array);
+
+               /* calculate the new size for imsm dev */
+               len = sizeof_imsm_dev(dev, 1);
+               if (u->delta_disks > 0)
+                       len += u->delta_disks * sizeof(struct imsm_disk);
+
+               update->space = NULL;
+               break;
+       }
+
        case update_create_array: {
                struct imsm_update_create_array *u = (void *) update->buf;
                struct intel_dev *dv;
@@ -5258,5 +5594,7 @@ struct superswitch super_imsm = {
        .activate_spare = imsm_activate_spare,
        .process_update = imsm_process_update,
        .prepare_update = imsm_prepare_update,
+       .takeover = imsm_takeover,
+
 #endif /* MDASSEMBLE */
 };
--
1.6.3.3



--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux