[mdadm PATCH 1/3] Enable takevoer for external metadata

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Mdmon is now monitoring sysfs raid level.
When the level changes mdmon schedules metadata update and updates internal active array struct.
IMSM takeover procedure scans for new device configuration (slot number changes) in sysfs
and handles adds/removals of missing devices.
Also when switching from Raid0 to other personality mdadm will start mdmon.
---
 Grow.c        |   13 ++
 managemon.c   |   16 +++
 mdadm.h       |    3 +
 mdmon.h       |    4 +
 monitor.c     |   50 ++++++++-
 super-intel.c |  331 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 411 insertions(+), 6 deletions(-)

diff --git a/Grow.c b/Grow.c
index d8d91cb..8b4f1d0 100644
--- a/Grow.c
+++ b/Grow.c
@@ -540,7 +540,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                        "       Please use a newer kernel\n");
                return 1;
        }
-       sra = sysfs_read(fd, 0, GET_LEVEL);
+       sra = sysfs_read(fd, 0, GET_LEVEL | GET_VERSION);
        frozen = freeze_array(sra);
        if (frozen < 0) {
                fprintf(stderr, Name ": %s is performing resync/recovery and cannot"
@@ -682,6 +682,17 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file,
                                fprintf(stderr, Name " level of %s changed to %s\n",
                                        devname, c);
                        changed = 1;
+
+                       /* if raid0 was takeovered by any other personality start mdmon */
+                       st = super_by_fd(fd);
+                       if (st->ss->external) {
+                               if ((level != 0) && (orig.level == 0)) {
+                                       int dn = devname2devnum(sra->text_version + 1);
+                                       if (!mdmon_running(dn))
+                                               start_mdmon(dn);
+                                       ping_monitor(devnum2devname(dn));
+                               }
+                       }
                }
        }

diff --git a/managemon.c b/managemon.c
index e335077..1fc0752 100644
--- a/managemon.c
+++ b/managemon.c
@@ -382,6 +382,16 @@ static void manage_member(struct mdstat_ent *mdstat,
        a->info.array.chunk_size = mdstat->chunk_size;
        // MORE

+       /* Takeover procedure */
+       if ((a->takeover == requested) && (a->container)) {
+               struct metadata_update *updates = NULL;
+               if (a->container->ss->takeover(a, &updates)) {
+                       a->container->ss->prepare_update(a->container, updates);
+                       a->takeover = pending;
+                       queue_metadata_update(updates);
+               }
+       }
+
        if (a->check_degraded) {
                struct metadata_update *updates = NULL;
                struct mdinfo *newdev = NULL;
@@ -479,7 +489,7 @@ static void manage_new(struct mdstat_ent *mdstat,
                return;

        mdi = sysfs_read(-1, mdstat->devnum,
-                        GET_LEVEL|GET_CHUNK|GET_DISKS|GET_COMPONENT|
+                        GET_LEVEL|GET_LAYOUT|GET_CHUNK|GET_DISKS|GET_COMPONENT|
                         GET_DEGRADED|GET_DEVS|GET_OFFSET|GET_SIZE|GET_STATE);

        new = malloc(sizeof(*new));
@@ -546,6 +556,10 @@ static void manage_new(struct mdstat_ent *mdstat,
        new->info.state_fd = sysfs_open(new->devnum, NULL, "array_state");
        new->resync_start_fd = sysfs_open(new->devnum, NULL, "resync_start");
        new->metadata_fd = sysfs_open(new->devnum, NULL, "metadata_version");
+       new->level_fd = sysfs_open(new->devnum, NULL, "level");
+       new->takeover = none;
+       new->prev_level  = -1;
+
        dprintf("%s: inst: %d action: %d state: %d\n", __func__, atoi(inst),
                new->action_fd, new->info.state_fd);

diff --git a/mdadm.h b/mdadm.h
index 836c64d..6a9703d 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -613,6 +613,9 @@ extern struct superswitch {
        void (*prepare_update)(struct supertype *st,
                               struct metadata_update *update);

+       int (*takeover)(struct active_array *a,
+                           struct metadata_update **updates);
+
        /* activate_spare will check if the array is degraded and, if it
         * is, try to find some spare space in the container.
         * On success, it add appropriate updates (For process_update) to
diff --git a/mdmon.h b/mdmon.h
index 4494085..84a1454 100644
--- a/mdmon.h
+++ b/mdmon.h
@@ -23,6 +23,7 @@ enum array_state { clear, inactive, suspended, readonly, read_auto,

 enum sync_action { idle, reshape, resync, recover, check, repair, bad_action };

+enum takeover_stage { none, requested, pending, finished };

 struct active_array {
        struct mdinfo info;
@@ -32,6 +33,7 @@ struct active_array {
        int action_fd;
        int resync_start_fd;
        int metadata_fd; /* for monitoring rw/ro status */
+       int level_fd;

        enum array_state prev_state, curr_state, next_state;
        enum sync_action prev_action, curr_action, next_action;
@@ -39,6 +41,8 @@ struct active_array {
        int check_degraded; /* flag set by mon, read by manage */

        int devnum;
+       int prev_level, curr_level;
+       enum takeover_stage takeover;
 };

 /*
diff --git a/monitor.c b/monitor.c
index 81fef49..87cb29a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -100,6 +100,17 @@ static enum sync_action read_action( int fd)
        return (enum sync_action) sysfs_match_word(buf, sync_actions);
 }

+static int read_level(int fd)
+{
+       char buf[20];
+       int n = read_attr(buf, 20, fd);
+
+       if (n <= 0)
+               return -1;
+       int level = map_name(pers, buf);
+       return level;
+}
+
 int read_dev_state(int fd)
 {
        char buf[60];
@@ -204,7 +215,9 @@ static int read_and_act(struct active_array *a)
        a->next_action = bad_action;

        a->curr_state = read_state(a->info.state_fd);
-       a->curr_action = read_action(a->action_fd);
+       if (a->action_fd > 0)
+               a->curr_action = read_action(a->action_fd);
+       a->curr_level = read_level(a->level_fd);
        a->info.resync_start = read_resync_start(a->resync_start_fd);
        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
                mdi->next_state = 0;
@@ -214,6 +227,31 @@ static int read_and_act(struct active_array *a)
                }
        }

+       /* takeover operation was finished */
+       if (a->takeover == finished) {
+               /* end up with raid0, stop the monitor */
+               if (a->curr_level == 0)
+                       deactivate = 1;
+               /* update current level */
+               a->info.array.level = a->curr_level;
+               a->takeover = none;
+       }
+
+       if (a->takeover == none) {
+               /* check for raid level change */
+               if (a->curr_level != a->prev_level)     {
+                       /* start takeover procedure */
+                       a->takeover = requested;
+               }
+               if ((a->curr_level == 0) && (a->prev_level != 0)) {
+                       /* Raid level was changed to raid0
+                          so close unsupported sysfs handles */
+                       if (a->action_fd > 0)
+                               close(a->action_fd);
+                       a->action_fd = -1;
+               }
+       }
+
        if (a->curr_state <= inactive &&
            a->prev_state > inactive) {
                /* array has been stopped */
@@ -347,9 +385,13 @@ static int read_and_act(struct active_array *a)
        }
        dprintf(" )\n");

+       /* Takeover started. Signal manager. */
+       if (a->takeover == requested)
+               signal_manager();
+
        /* move curr_ to prev_ */
+       a->prev_level = a->curr_level;
        a->prev_state = a->curr_state;
-
        a->prev_action = a->curr_action;

        for (mdi = a->info.devs; mdi ; mdi = mdi->next) {
@@ -460,7 +502,9 @@ static int wait_and_act(struct supertype *container, int nowait)
                }

                add_fd(&rfds, &maxfd, a->info.state_fd);
-               add_fd(&rfds, &maxfd, a->action_fd);
+               add_fd(&rfds, &maxfd, a->level_fd);
+               if (a->action_fd > 0)
+                       add_fd(&rfds, &maxfd, a->action_fd);
                for (mdi = a->info.devs ; mdi ; mdi = mdi->next)
                        add_fd(&rfds, &maxfd, mdi->state_fd);

diff --git a/super-intel.c b/super-intel.c
index fcf438c..32b29c8 100644
--- a/super-intel.c
+++ b/super-intel.c
@@ -284,6 +284,7 @@ enum imsm_update_type {
        update_activate_spare,
        update_create_array,
        update_add_disk,
+       update_takeover,
 };

 struct imsm_update_activate_spare {
@@ -308,6 +309,21 @@ struct imsm_update_add_disk {
        enum imsm_update_type type;
 };

+struct slot_change {
+       int prev_slot;
+       int new_slot;
+};
+
+struct imsm_update_takeover {
+       enum imsm_update_type type;
+       int array;
+       int devnum;
+       int new_level;
+       int delta_disks;
+       struct slot_change sl_changes[IMSM_MAX_DEVICES];
+       int sl_changed;
+};
+
 static struct supertype *match_metadata_desc_imsm(char *arg)
 {
        struct supertype *st;
@@ -1698,6 +1714,42 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
        memcpy(dest, src, sizeof_imsm_dev(src, 0));
 }

+struct imsm_dev *reallocate_imsm_dev(struct intel_super *super,
+                                                                        int array_index,
+                                                                        int map0_num_members,
+                                                                        int map1_num_members)
+{
+       struct imsm_dev *newdev = NULL;
+       struct imsm_dev *retVal = NULL;
+       struct intel_dev *dv = NULL;
+       int memNeeded;
+
+       if (super) {
+               /* Calculate space needed for imsm_dev with a single map */
+               memNeeded = sizeof(struct imsm_dev) + sizeof(__u32) * (map0_num_members - 1);
+               /* Check if we need second map */
+               if (map1_num_members > 0)
+                       memNeeded += sizeof(struct imsm_map) + sizeof(__u32) * (map1_num_members - 1);
+
+               newdev = malloc(memNeeded);
+               if (!newdev) {
+                       fprintf(stderr, "error: imsm meta update not possible due to no memory conditions\n");
+                       return NULL;
+               }
+               /* Find our device */
+               for (dv = super->devlist; dv; dv = dv->next)
+                       if (dv->index == array_index) {
+                               /* Copy imsm_dev into the new buffer */
+                               imsm_copy_dev(newdev, dv->dev);
+                               free(dv->dev);
+                               dv->dev = newdev;
+                               retVal = newdev;
+                               break;
+                       }
+       }
+       return retVal;
+}
+
 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
 {
        /*
@@ -4076,7 +4128,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st)

                this = malloc(sizeof(*this));
                if (!this) {
-                       fprintf(stderr, Name ": failed to allocate %lu bytes\n",
+                       fprintf(stderr, Name ": failed to allocate %u bytes\n",
                                sizeof(*this));
                        break;
                }
@@ -4637,6 +4689,117 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot,
        return dl;
 }

+
+static int imsm_takeover(struct active_array *a,
+                        struct metadata_update **updates)
+{
+       struct intel_super *super = a->container->sb;
+       struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
+       struct imsm_map *map = get_imsm_map(dev, 0);
+       struct metadata_update *mu;
+       struct imsm_update_takeover *u;
+       struct mdinfo *di, *mdi, *newdi;
+       struct mdinfo info;
+       struct dl *dl;
+       size_t len;
+       int slot, i;
+
+       /* First check if actual takeover occured */
+       super->current_vol = a->info.container_member;
+       getinfo_super_imsm(a->container, &info);
+
+       if (info.array.level == a->curr_level) {
+               /* no raid level was actually changed */
+               fprintf(stderr, "Takeover started but no raid level was changed!\n");;
+               return 0;
+       }
+
+       /* Read sysfs devs */
+       mdi = sysfs_read(-1, a->devnum, GET_DEVS|SKIP_GONE_DEVS);
+       if (!mdi) {
+               fprintf(stderr, "Could not read sysfs!\n");;
+               return 0;
+       }
+
+       /* allocate memory for update struct */
+       len = sizeof(*u);
+       mu = malloc(sizeof(*mu));
+       if (mu) {
+               mu->buf = malloc(len);
+               if (mu->buf == NULL) {
+                       free(mu);
+                       fprintf(stderr, "%s: failed to allocate update buffer\n",
+                                       __func__);
+                       return 0;
+               }
+       } else {
+               fprintf(stderr, "%s: failed to allocate update buffer\n",
+                               __func__);
+               return 0;
+       }
+
+       /* initialize update struct */
+       mu->space = NULL;
+       mu->len = len;
+       mu->next = *updates;
+       u = (struct imsm_update_takeover *) mu->buf;
+       u->type = update_takeover;
+       u->array = a->info.container_member;
+       u->devnum = a->devnum;
+       u->new_level = a->curr_level;
+
+       /* now itarate through devices to detect any slot changes */
+       i = 0;
+       for (dl = super->disks; dl; dl = dl->next) {
+               for (newdi = mdi->devs; newdi; newdi = newdi->next) {
+                       if ((dl->major == newdi->disk.major) &&
+                           (dl->minor == newdi->disk.minor)) {
+
+                               slot = get_imsm_disk_slot(map, dl->index);
+                               if (newdi->disk.raid_disk != slot) {
+                                       /* slot change detected, add the change to update struct */
+                                       u->sl_changes[i].prev_slot = slot;
+                                       u->sl_changes[i].new_slot = newdi->disk.raid_disk;
+                                       i++;
+                           }
+                               /* Update current mdadm device list */
+                               for (di = a->info.devs; di; di = di->next) {
+                                       if ((di->disk.major == newdi->disk.major) &&
+                                               (di->disk.minor == newdi->disk.minor)) {
+                                               di->disk.raid_disk = newdi->disk.raid_disk;
+                                       }
+                           }
+                               break;
+                       }
+               }
+       }
+       /* how many slot changes */
+       u->sl_changed = i;
+
+       /* depending on raid level change, set delta disks */
+       u->delta_disks = 0;
+       if ((info.array.level == 0) && (a->curr_level == 5)) {
+               /* level changed: raid0 -> raid5 */
+               u->delta_disks = 1;
+       }
+       if ((info.array.level == 5) && (a->curr_level == 0)) {
+               /* level changed: raid5 -> raid0 */
+               u->delta_disks = -1;
+       }
+       if ((info.array.level == 10) && (a->curr_level == 0)) {
+               /* level changed: raid10 -> raid0 */
+               u->delta_disks = -2;
+       }
+       if ((info.array.level == 0) && (a->curr_level == 10)) {
+               /* level changed: raid0 -> raid10 */
+               u->delta_disks = 2;
+       }
+
+       *updates = mu;
+       return 1;
+}
+
+
 static struct mdinfo *imsm_activate_spare(struct active_array *a,
                                          struct metadata_update **updates)
 {
@@ -4807,6 +4970,27 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_

 static void imsm_delete(struct intel_super *super, struct dl **dlp, int index);

+static int find_free_slot(struct mdinfo *info, int prev_slot)
+{
+       struct mdinfo *tmp_info;
+       int slot = prev_slot + 1;
+
+       /* iterate throuth mdinfo devs and find first free slot
+          that is beyond prev_slot argument */
+       do {
+               tmp_info = info;
+               while (tmp_info) {
+                       if (tmp_info->disk.raid_disk == slot) {
+                               /* current slot is occupied */
+                               slot++;
+                               break;
+                       }
+                       tmp_info = tmp_info->next;
+               }
+       } while (tmp_info);
+       return slot;
+}
+
 static void imsm_process_update(struct supertype *st,
                                struct metadata_update *update)
 {
@@ -4841,6 +5025,135 @@ static void imsm_process_update(struct supertype *st,
        mpb = super->anchor;

        switch (type) {
+
+       case update_takeover: {
+               struct imsm_update_takeover *u = (void *) update->buf;
+               struct imsm_dev *dev = get_imsm_dev(super, u->array);
+               struct imsm_map *map = get_imsm_map(dev, 0);
+               struct active_array *array;
+               struct mdinfo *d;
+               struct dl *dl = NULL;
+               int i, slot = -1;
+
+               /* Initialize pointer to the proper active array */
+               for (array = st->arrays; array; array = array->next)
+                       if (array->devnum == u->devnum) {
+                               d = array->info.devs;
+                               break;
+                       }
+
+               if (!array) {
+                       fprintf(stderr, "error: could not find any active array!\n");
+                       return;
+               }
+
+               if (u->delta_disks > 0) {
+                       /* we'll be adding missing disks: Raid0->Raid5 or Raid0->Raid10 *
+                        * First, reallocate Dev due to new map disks entries */
+                       struct imsm_dev *newdev = NULL;
+                       newdev = reallocate_imsm_dev(super, u->array,
+                                                                                map->num_members + u->delta_disks,
+                                                                                0);
+                       if (newdev == NULL) {
+                               fprintf(stderr, "error: could not reallocate dev structure!\n");
+                               return;
+                       }
+
+                       /* Refresh pointers */
+                       dev = get_imsm_dev(super, u->array);
+                       map = get_imsm_map(dev, 0);
+
+                       /* Add missing disk */
+                       if (super->missing) {
+                               fprintf(stderr, "error: there are already missing disks. Takeover failed!\n");
+                               break;
+                       }
+
+                       /* Update slots in the raid map */
+                       for (i = 0; i < u->sl_changed; i++) {
+                               int idx;
+                               idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot);
+                               /* now after slot changes */
+                               set_imsm_ord_tbl_ent(map, u->sl_changes[i].new_slot, idx);
+                       }
+
+                       /* Now create new devices representing missing disks */
+                       slot = -1;
+                       for (i = 0; i < u->delta_disks; i++) {
+                               dl = malloc(sizeof(*dl));
+                               if (!dl) {
+                                       fprintf(stderr, Name": could not allocate raid device\n");
+                                       return;
+                               }
+                           dl->major = dl->minor = 0;
+                           dl->fd = -1;
+                           dl->devname = strdup("missing");
+                               dl->index = mpb->num_disks;
+                           sprintf((char *)dl->disk.serial, "MISSING_DISK%d", i);
+                           dl->disk.total_blocks = -1;
+                           dl->disk.scsi_id = -1;
+                           dl->disk.status = FAILED_DISK;
+                           dl->disk.owner_cfg_num = 0;
+                           dl->e = NULL;
+                           dl->next = super->missing;
+                           super->missing = dl;
+                           /* Set slot for missing disk */
+                           slot = find_free_slot(d, slot);
+                           set_imsm_ord_tbl_ent(map, slot, dl->index | IMSM_ORD_REBUILD);
+                           dl->raiddisk = slot;
+                           /* Increase number of disks */
+                           mpb->num_disks++;
+                       }
+
+                       /* Update map */
+                       map->num_members += u->delta_disks;
+                       map->failed_disk_num = dl->index;
+                       map->map_state = IMSM_T_STATE_DEGRADED;
+               }
+
+               if (u->delta_disks < 0) {
+                       /* we'll be adding missing disks: Raid5->Raid0 or Raid10->Raid0 */
+                       int missing_disks = 0;
+                       dl = super->missing;
+
+                       /* Count missing drives */
+                       while (dl) {
+                               missing_disks--;
+                               dl = dl->next;
+                       }
+                   if (missing_disks != (u->delta_disks)) {
+                       fprintf(stderr, "error: incorrect missing drive number for takeover: %d\n",
+                                               missing_disks);
+                       break;
+                   }
+                   /* removing missing disk */
+                   while (super->missing) {
+                               dl = super->missing;
+                               super->missing = dl->next;
+                               __free_imsm_disk(dl);
+                   }
+                   /* Update slots in the raid map */
+                   for (i = 0; i < u->sl_changed; i++) {
+                               int idx;
+                               idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot);
+                               /* now after slot changes */
+                               set_imsm_ord_tbl_ent(map, u->sl_changes[i].new_slot, idx);
+                       }
+                   /* Update mpb */
+                   mpb->num_disks += u->delta_disks;
+                   map->num_members += u->delta_disks;
+                   map->failed_disk_num = 0;
+                   map->map_state = IMSM_T_STATE_NORMAL;
+               }
+
+               /* Update raid level */
+               map->raid_level = u->new_level;
+               super->updates_pending++;
+               array->takeover = finished;
+
+               imsm_update_version_info(super);
+               break;
+       }
        case update_activate_spare: {
                struct imsm_update_activate_spare *u = (void *) update->buf;
                struct imsm_dev *dev = get_imsm_dev(super, u->array);
@@ -5096,6 +5409,20 @@ static void imsm_prepare_update(struct supertype *st,
        size_t len = 0;

        switch (type) {
+
+       case update_takeover: {
+               struct imsm_update_takeover *u = (void *) update->buf;
+               struct imsm_dev *dev = get_imsm_dev(super, u->array);
+
+               /* calculate the new size for imsm dev */
+               len = sizeof_imsm_dev(dev, 1);
+               if (u->delta_disks > 0)
+                       len += u->delta_disks * sizeof(struct imsm_disk);
+
+               update->space = NULL;
+               break;
+       }
+
        case update_create_array: {
                struct imsm_update_create_array *u = (void *) update->buf;
                struct intel_dev *dv;
@@ -5258,5 +5585,7 @@ struct superswitch super_imsm = {
        .activate_spare = imsm_activate_spare,
        .process_update = imsm_process_update,
        .prepare_update = imsm_prepare_update,
+       .takeover = imsm_takeover,
+
 #endif /* MDASSEMBLE */
 };

��.n��������+%������w��{.n�����{����w��ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux