When performing takeover 0->10 or 10->0 mdmon should update the external metadata (due to disk slot changes). To achieve that mdadm, after changing the level in md, mdadm calls update_super with "update_level" type. update_super() allocates a new imsm_dev with updated disk slot numbers to be processed by mdmon in process_update(). process_update() discovers missing disks and adds them to imsm metadata. Signed-off-by: Maciej Trela <maciej.trela@xxxxxxxxx> --- Grow.c | 42 ++++++++++- managemon.c | 13 +++ mdadm.h | 1 mdstat.c | 24 ++++++ monitor.c | 2 - super-intel.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 288 insertions(+), 6 deletions(-) diff --git a/Grow.c b/Grow.c index 6264996..ff04fc0 100644 --- a/Grow.c +++ b/Grow.c @@ -503,7 +503,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, int ndata, odata; int orig_level = UnSet; char alt_layout[40]; - int *fdlist; + int *fdlist, container_fd, dn; unsigned long long *offsets; int d, i; int nrdisks; @@ -515,7 +515,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, int changed = 0; int done; - struct mdinfo *sra; + struct mdinfo *sra, ext_sra; struct mdinfo *sd; if (ioctl(fd, GET_ARRAY_INFO, &array) < 0) { @@ -540,7 +540,7 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, " Please use a newer kernel\n"); return 1; } - sra = sysfs_read(fd, 0, GET_LEVEL); + sra = sysfs_read(fd, 0, GET_VERSION | GET_LEVEL); frozen = freeze_array(sra); if (frozen < 0) { fprintf(stderr, Name ": %s is performing resync/recovery and cannot" @@ -690,6 +690,42 @@ int Grow_reshape(char *devname, int fd, int quiet, char *backup_file, fprintf(stderr, Name " level of %s changed to %s\n", devname, c); changed = 1; + + st = super_by_fd(fd); + if (st && st->ss->external) { + sysfs_free(sra); + sra = sysfs_read(fd, 0, + GET_VERSION | GET_LEVEL | + GET_LAYOUT | GET_DISKS | GET_DEVS); + + dn = devname2devnum(sra->text_version + 1); + container_fd = open_dev_excl(dn); + if (container_fd < 0) { + fprintf(stderr, Name ": Cannot get exclusive access " + "to container.\n"); + rv = 1; + goto release; + } + st->ss->load_super(st, container_fd, NULL); + close(container_fd); + st->ss->getinfo_super(st, &ext_sra); + st->update_tail = &st->updates; + err = st->ss->update_super(st, sra, "update_level", + ext_sra.name, 0, 0, NULL); + if (err != 0) { + /* no need to perform any updates with mdmon */ + rv = 0; + goto release; + } + /* if raid0 was takeovered by any other + * personality start mdmon */ + if (level > 0 && orig.level == 0) { + if (!mdmon_running(dn)) + start_mdmon(dn); + ping_monitor(devnum2devname(dn)); + } + flush_metadata_updates(st); + } } } diff --git a/managemon.c b/managemon.c index 037406f..c279664 100644 --- a/managemon.c +++ b/managemon.c @@ -364,6 +364,9 @@ static void manage_container(struct mdstat_ent *mdstat, static void manage_member(struct mdstat_ent *mdstat, struct active_array *a) { + struct active_array *newa; + int level; + /* Compare mdstat info with known state of member array. * We do not need to look for device state changes here, that * is dealt with by the monitor. @@ -382,6 +385,16 @@ static void manage_member(struct mdstat_ent *mdstat, a->info.array.chunk_size = mdstat->chunk_size; // MORE + level = mdstat_to_level(mdstat); + if (a->info.array.level != level && level >= 0) { + newa = duplicate_aa(a); + if (!newa) + goto out; + newa->info.array.level = level; + replace_array(a->container, a, newa); + a = newa; + } + if (a->check_degraded) { struct metadata_update *updates = NULL; struct mdinfo *newdev = NULL; diff --git a/mdadm.h b/mdadm.h index 362b66b..68c15ab 100644 --- a/mdadm.h +++ b/mdadm.h @@ -358,6 +358,7 @@ extern void free_mdstat(struct mdstat_ent *ms); extern void mdstat_wait(int seconds); extern void mdstat_wait_fd(int fd, const sigset_t *sigmask); extern int mddev_busy(int devnum); +int mdstat_to_level(struct mdstat_ent *ms); struct map_ent { struct map_ent *next; diff --git a/mdstat.c b/mdstat.c index 4a9f370..af1cae4 100644 --- a/mdstat.c +++ b/mdstat.c @@ -251,6 +251,30 @@ struct mdstat_ent *mdstat_read(int hold, int start) return rv; } +int mdstat_to_level(struct mdstat_ent *ms) +{ + if (strncmp(ms->level, "raid0", 5) == 0) + return 0; + else if (strncmp(ms->level, "raid10", 6) == 0) + return 10; + else if (strncmp(ms->level, "raid1", 5) == 0) + return 1; + else if (strncmp(ms->level, "raid4", 5) == 0) + return 4; + else if (strncmp(ms->level, "raid5", 5) == 0) + return 5; + else if (strncmp(ms->level, "raid6", 5) == 0) + return 6; + else if (strncmp(ms->level, "linear", 6) == 0) + return LEVEL_LINEAR; + else if (strncmp(ms->level, "faulty", 6) == 0) + return LEVEL_FAULTY; + else if (strncmp(ms->level, "multipath", 9) == 0) + return LEVEL_MULTIPATH; + + return LEVEL_UNSUPPORTED; +} + void mdstat_wait(int seconds) { fd_set fds; diff --git a/monitor.c b/monitor.c index e43e545..4578718 100644 --- a/monitor.c +++ b/monitor.c @@ -447,7 +447,7 @@ static int wait_and_act(struct supertype *container, int nowait) /* once an array has been deactivated we want to * ask the manager to discard it. */ - if (!a->container) { + if (!a->container || a->info.array.level == 0) { if (discard_this) { ap = &(*ap)->next; continue; diff --git a/super-intel.c b/super-intel.c index a196ca3..b0bd0e6 100644 --- a/super-intel.c +++ b/super-intel.c @@ -283,6 +283,7 @@ enum imsm_update_type { update_activate_spare, update_create_array, update_add_disk, + update_level, }; struct imsm_update_activate_spare { @@ -307,6 +308,13 @@ struct imsm_update_add_disk { enum imsm_update_type type; }; +struct imsm_update_level { + enum imsm_update_type type; + int delta_disks; + int container_member; + struct imsm_dev dev; +}; + static struct supertype *match_metadata_desc_imsm(char *arg) { struct supertype *st; @@ -1588,6 +1596,9 @@ static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info) } } +static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks); +static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src); + static int update_super_imsm(struct supertype *st, struct mdinfo *info, char *update, char *devname, int verbose, int uuid_set, char *homehost) @@ -1620,12 +1631,98 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info, struct intel_super *super = st->sb; struct imsm_super *mpb; - /* we can only update container info */ - if (!super || super->current_vol >= 0 || !super->anchor) + if (!super || !super->anchor) return 1; mpb = super->anchor; + if (strcmp(update, "update_level") == 0) { + struct imsm_update_level *u; + struct imsm_dev *dev_new, *dev = NULL; + struct imsm_map *map_new, *map; + struct mdinfo *newdi; + struct dl *dl; + int *tmp_ord_tbl; + int i, slot, idx; + int len, disks; + + if (!is_raid_level_supported(super->orom, + info->array.level, + info->array.raid_disks)) + return 1; + + for (i = 0; i < mpb->num_raid_devs; i++) { + dev = get_imsm_dev(super, i); + if (strcmp(devname, (char *)dev->volume) == 0) + break; + } + if (dev == NULL) + return 1; + + if (i == super->anchor->num_raid_devs) + return 1; + + map = get_imsm_map(dev, 0); + + /* update level is needed only for 0->10 and 10->0 transitions */ + if ((info->array.level != 10 || map->raid_level != 0) && + (info->array.level != 0 || map->raid_level != 10)) + return 1; + + disks = (info->array.raid_disks > map->num_members) ? + info->array.raid_disks : map->num_members; + len = sizeof(struct imsm_update_level) + + ((disks - 1) * sizeof(__u32)); + + u = malloc(len); + if (u == NULL) + return 1; + + dev_new = &u->dev; + imsm_copy_dev(dev_new, dev); + map_new = get_imsm_map(dev_new, 0); + + tmp_ord_tbl = malloc(sizeof(int) * disks); + if (tmp_ord_tbl == NULL) { + free(u); + return 1; + } + + for (i = 0; i < disks; i++) + tmp_ord_tbl[i] = -1; + + /* iterate through devices to detect slot changes */ + for (dl = super->disks; dl; dl = dl->next) + for (newdi = info->devs; newdi; newdi = newdi->next) { + + if ((dl->major != newdi->disk.major) || + (dl->minor != newdi->disk.minor)) + continue; + slot = get_imsm_disk_slot(map, dl->index); + idx = get_imsm_ord_tbl_ent(dev_new, slot); + tmp_ord_tbl[newdi->disk.raid_disk] = idx; + break; + } + + for (i = 0; i < disks; i++) + set_imsm_ord_tbl_ent(map_new, i, tmp_ord_tbl[i]); + free(tmp_ord_tbl); + + map_new->raid_level = info->array.level; + map_new->num_members = info->array.raid_disks; + + u->type = update_level; + u->delta_disks = info->array.raid_disks - map->num_members; + u->container_member = info->container_member; + append_metadata_update(st, u, len); + rv = 0; + goto completed; + } + + /* we can only update container info */ + if (super->current_vol >= 0) + return 1; + if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private) fprintf(stderr, Name ": '--uuid' not supported for imsm metadata\n"); @@ -1653,6 +1750,7 @@ static int update_super_imsm(struct supertype *st, struct mdinfo *info, Name ": '--update=%s' not supported for imsm metadata\n", update); + completed: /* successful update? recompute checksum */ if (rv == 0) mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb)); @@ -1700,6 +1798,45 @@ static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src) memcpy(dest, src, sizeof_imsm_dev(src, 0)); } +struct imsm_dev *reallocate_imsm_dev(struct intel_super *super, + int array_index, + int map_num_members) +{ + struct imsm_dev *newdev = NULL; + struct imsm_dev *retval = NULL; + struct intel_dev *dv = NULL; + struct imsm_dev *dv_free = NULL; + int memNeeded; + + if (!super) + return NULL; + + /* Calculate space needed for imsm_dev with a double map */ + memNeeded = sizeof(struct imsm_dev) + sizeof(__u32) * (map_num_members - 1) + + sizeof(struct imsm_map) + sizeof(__u32) * (map_num_members - 1); + + newdev = malloc(memNeeded); + if (!newdev) { + fprintf(stderr, "error: imsm meta update not possible due to no memory conditions\n"); + return NULL; + } + /* Find our device */ + for (dv = super->devlist; dv; dv = dv->next) + if (dv->index == array_index) { + /* Copy imsm_dev into the new buffer */ + imsm_copy_dev(newdev, dv->dev); + dv_free = dv->dev; + dv->dev = newdev; + retval = newdev; + free(dv_free); + break; + } + if (retval == NULL) + free(newdev); + + return retval; +} + static int compare_super_imsm(struct supertype *st, struct supertype *tst) { /* @@ -4843,6 +4980,57 @@ static void imsm_process_update(struct supertype *st, mpb = super->anchor; switch (type) { + case update_level: { + struct imsm_update_level *u = (void *) update->buf; + struct imsm_dev *dev_new, *dev = NULL; + struct imsm_map *map; + struct dl *d; + int i; + + dev_new = &u->dev; + for (i = 0; i < mpb->num_raid_devs; i++) { + dev = get_imsm_dev(super, i); + if (strcmp((char *)dev_new->volume, (char *)dev->volume) == 0) + break; + } + if (i == super->anchor->num_raid_devs) + return; + + if (dev == NULL) + return; + + imsm_copy_dev(dev, dev_new); + map = get_imsm_map(dev, 0); + mpb->num_disks += u->delta_disks; + + /* clear missing disks list */ + while (super->missing) { + d = super->missing; + super->missing = d->next; + __free_imsm_disk(d); + } + find_missing(super); + + /* clear new disk entries if number of disks increased*/ + d = super->missing; + for (i = 0; i < map->num_members; i++) { + if (map->disk_ord_tbl[i] != -1) + continue; + assert(d != NULL); + if (!d) + break; + memset(&d->disk, 0, sizeof(d->disk)); + strcpy((char *)d->disk.serial, "MISSING"); + d->disk.total_blocks = map->blocks_per_member; + /* Set slot for missing disk */ + set_imsm_ord_tbl_ent(map, i, d->index | IMSM_ORD_REBUILD); + d->raiddisk = i; + d = d->next; + } + + super->updates_pending++; + break; + } case update_activate_spare: { struct imsm_update_activate_spare *u = (void *) update->buf; struct imsm_dev *dev = get_imsm_dev(super, u->array); @@ -5098,6 +5286,26 @@ static void imsm_prepare_update(struct supertype *st, size_t len = 0; switch (type) { + case update_level: { + struct imsm_update_level *u = (void *) update->buf; + struct active_array *a; + + dprintf("prepare_update(): update level\n"); + len += u->delta_disks * sizeof(struct imsm_disk) + + u->delta_disks * sizeof(__u32); + + for (a = st->arrays; a; a = a->next) + if (a->info.container_member == u->container_member) + break; + if (a == NULL) + break; /* what else we can do here? */ + + /* we'll add new disks to imsm_dev */ + if (u->delta_disks > 0) + reallocate_imsm_dev(super, u->container_member, + a->info.array.raid_disks); + break; + } case update_create_array: { struct imsm_update_create_array *u = (void *) update->buf; struct intel_dev *dv; ��.n��������+%������w��{.n�����{����w��ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f