This is a continuation of the OLCE (online capacity expansion) patches sent by Adam Kwolek some time ago, so please apply this one after those patches. Support for IMSM Migration Record was added. The migration record is initialized with IMSM family number when starting the reshape. During the assembly of partially migrated array the migration record is read to recalculate the reshape parameters. For the compatibility with IMSM - when migrating raid0 array is detected it is switched to raid4 reshape, so mdmon/md could restart reshaping with raid5 personality. Chunk size changes are now monitored for external metadata. Regards, Maciek. --- Assemble.c | 37 +++++ managemon.c | 3 monitor.c | 57 +++++--- super-intel.c | 398 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 424 insertions(+), 71 deletions(-) diff --git a/Assemble.c b/Assemble.c index d4398b6..3d2dc82 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1275,6 +1275,7 @@ int assemble_container_content(struct supertype *st, int mdfd, struct mdinfo *dev, *sra; int working = 0, preexist = 0; struct map_ent *map = NULL; + char buf[20]; sysfs_init(content, mdfd, 0); @@ -1284,6 +1285,14 @@ int assemble_container_content(struct supertype *st, int mdfd, close(mdfd); return 1; } + + if (content->reshape_active) { + sysfs_set_num(sra, NULL, "reshape_position", content->reshape_progress); + sysfs_set_num(sra, NULL, "chunk_size", content->new_chunk); + sysfs_set_num(sra, NULL, "layout", content->new_layout); + sysfs_set_num(sra, NULL, "raid_disks", content->array.raid_disks + content->delta_disks); + } + if (sra) { sysfs_free(sra); sra = NULL; @@ -1342,6 +1351,34 @@ int assemble_container_content(struct supertype *st, int mdfd, } if (!err) wait_for(chosen_name, mdfd); + + sra = sysfs_read(mdfd, 0, GET_VERSION); + if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) + if (sysfs_set_array(content, md_get_version(mdfd)) != 0) { + close(mdfd); + return 1; + } + + if (content->reshape_active) { + /* wait for reconfiguration is beeing signalled from monitor */ + int retval; + long sync_compl = -1; + char *ep; + { + sleep(1); + retval = sysfs_get_str(sra, NULL, "sync_completed", buf, 20); + if (retval > 0) + sync_compl = strtol(buf, &ep, 10); + } while ((sync_compl != 0) || (retval < 0)); + + sysfs_set_str(sra, NULL, "sync_max", "max"); + } + + if (sra) { + sysfs_free(sra); + sra = NULL; + } + close(mdfd); return 0; /* FIXME should have an O_EXCL and wait for read-auto */ diff --git a/managemon.c b/managemon.c index b42a00b..7744941 100644 --- a/managemon.c +++ b/managemon.c @@ -459,7 +459,7 @@ static void manage_member(struct mdstat_ent *mdstat, char sync_max_str[SYNC_MAX_STR_LEN]; if (sysfs_get_str(&(a->info), NULL,"sync_max", sync_max_str, SYNC_MAX_STR_LEN) >0) { - if ((strncmp(sync_max_str, "0", 1) == 0) && ( a->reshape_delta_disks > 0 )) { + if ((strncmp(sync_max_str, "0", 1) == 0) && (a->reshape_delta_disks > 0)) { /* this is reshape /hold on/ phase * we ahve add device before any action takes place * to allow manage new device also along entire managment procedure @@ -608,7 +608,6 @@ static void manage_member(struct mdstat_ent *mdstat, } free_updates(&updates); } - } static int aa_ready(struct active_array *aa) diff --git a/monitor.c b/monitor.c index 7742202..2fa2983 100644 --- a/monitor.c +++ b/monitor.c @@ -109,29 +109,33 @@ static int read_level(int fd) char buf[20]; int n = read_attr(buf, 20, fd); + if (n <= 0) return -1; int level = map_name(pers, buf); return level; } + static unsigned long long read_reshape_position( int fd) { char buf[40]; int n = read_attr(buf, 40, fd); - unsigned long long retVal = 0; + unsigned long long retVal; + char *ep; - if (n <= 0) { - /* error - do nothing */ - } else { - /* check if reshape position i s numeric */ - char *ep; - - retVal = strtoull(buf, &ep, 10); - if (ep == buf || - ((*ep != 0) && (*ep != '\n') && (*ep != ' '))) { - /* error so we have to wait more */ - retVal =0; - } + /* error - do nothing */ + if (n <= 0) + return 0; + + if (strncmp(buf, "none", 4) == 0) + return MaxSector; + + /* check if reshape position is numeric */ + retVal = strtoull(buf, &ep, 10); + if (ep == buf || + ((*ep != 0) && (*ep != '\n') && (*ep != ' '))) { + /* error so we have to wait more */ + retVal = 0; } return retVal; @@ -279,7 +283,7 @@ static int read_and_act(struct active_array *a) if (a->takeover == none) { /* check for raid level change */ - if (a->curr_level != a->prev_level) { + if ((a->curr_level != a->prev_level) && (a->prev_level != -1)) { /* start takeover procedure */ a->takeover = requested; } @@ -363,8 +367,10 @@ static int read_and_act(struct active_array *a) } } -#define OLCE_STR_LEN 50 -#define RAID_DISKS_STR_LEN 50 +#define OLCE_STR_LEN 50 +#define RAID_DISKS_STR_LEN 50 +#define CHUNK_SIZE_STR_LEN 50 + if (!deactivate) { /* monitor reshape position (update meta) * Start reshape @@ -375,21 +381,18 @@ static int read_and_act(struct active_array *a) case reshape : /* continue reshape */ { if (a->reshape_position != reshape_position) { - /* uptdate meta - * where update reshape position ? propably in manager to identify request - */ - - /* a->reshape_position = reshape_position */ + /* uptdate meta */ + a->container->ss->set_array_state(a, 1); + a->reshape_position = reshape_position; sysfs_set_str(&(a->info), NULL,"sync_completed","0"); - /* reshape checkpointing */ - /* send_to_manager = 1; */ } } break; default: { - char raid_disks_str[ RAID_DISKS_STR_LEN ]; + char raid_disks_str[RAID_DISKS_STR_LEN]; + char chunk_size_str[CHUNK_SIZE_STR_LEN]; /* we ahve add device before any action takes place * to allow manage new device also along entire managment procedure * @@ -399,6 +402,12 @@ static int read_and_act(struct active_array *a) * 4. trigger mdadm by setting sync_compl=0 in sysfs */ if (sysfs_get_str(&(a->info), NULL, "raid_disks", raid_disks_str, RAID_DISKS_STR_LEN) >0) { + + if (sysfs_get_str(&(a->info), NULL, "chunk_size", chunk_size_str, RAID_DISKS_STR_LEN) > 0) { + char *endPtr; + a->info.new_chunk = strtol(chunk_size_str, &endPtr, 10); + } + char *spaceLocation = NULL; /* get it in format: new_raid_disks (old_raid_disks) */ spaceLocation = strchr (raid_disks_str, ' '); diff --git a/super-intel.c b/super-intel.c index f6e2538..5eeb2e3 100644 --- a/super-intel.c +++ b/super-intel.c @@ -199,6 +199,26 @@ static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" +struct MigrRecord { + __u32 recStatus; /* Status used to determine how to restart + * migration in case it aborts in some fashion */ + __u32 currMigrUnit; /* 0..numMigrUnits-1 */ + __u32 familyNum; /* Family number of MPB containing the RaidDev + * that is migrating */ + __u32 ascendingMigr; /* True if migrating in increasing order of lbas */ + __u32 blocksPerUnit; /* Num disk blocks per unit of operation */ + __u32 destDepthPerUnit; /* Num member blocks each destMap member disk + * advances per unit-of-operation */ + __u32 ckptAreaPba; /* Pba of first block of ckpt copy area */ + __u32 dest1stMemberLba; /* First member lba on first stripe of destination */ + __u32 numMigrUnits; /* Total num migration units-of-op */ + __u32 postMigrVolCapacity; /* Size of volume after migration completes */ + __u32 postMigrVolCapacityHi;/* Expansion space for LBA64 */ + __u32 ckptReadDiskNum; /* Which member disk in destSubMap[0] the + * migration ckpt record was read from + * (for recovered migrations) */ +}; + static __u8 migr_type(struct imsm_dev *dev) { if (dev->vol.migr_type == MIGR_VERIFY && @@ -484,14 +504,26 @@ static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index) return NULL; } -static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot) +/* + * for second_map: + * == 0 get first map + * == 1 get second map + * == -1 than get map according to the current migr_state + */ +static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, + int slot, + int second_map) { struct imsm_map *map; - if (dev->vol.migr_state) - map = get_imsm_map(dev, 1); - else - map = get_imsm_map(dev, 0); + if (second_map == -1) { + if (dev->vol.migr_state) + map = get_imsm_map(dev, 1); + else + map = get_imsm_map(dev, 0); + } else { + map = get_imsm_map(dev, second_map); + } /* top byte identifies disk under rebuild */ return __le32_to_cpu(map->disk_ord_tbl[slot]); @@ -499,9 +531,9 @@ static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev, int slot) #define ord_to_idx(ord) (((ord) << 8) >> 8) #define get_ord_flags(ord) (((ord) >> 8) << 8) -static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot) +static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map) { - __u32 ord = get_imsm_ord_tbl_ent(dev, slot); + __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map); return ord_to_idx(ord); } @@ -691,7 +723,7 @@ static void print_imsm_dev(struct imsm_dev *dev, char *uuid, int disk_idx) printf(" Members : %d\n", map->num_members); slot = get_imsm_disk_slot(map, disk_idx); if (slot >= 0) { - ord = get_imsm_ord_tbl_ent(dev, slot); + ord = get_imsm_ord_tbl_ent(dev, slot, -1); printf(" This Slot : %d%s\n", slot, ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : ""); } else @@ -1341,12 +1373,12 @@ static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev) return num_stripes_per_unit_resync(dev); } -static __u8 imsm_num_data_members(struct imsm_dev *dev) +static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map) { /* named 'imsm_' because raid0, raid1 and raid10 * counter-intuitively have the same number of data disks */ - struct imsm_map *map = get_imsm_map(dev, 0); + struct imsm_map *map = get_imsm_map(dev, second_map); switch (get_imsm_raid_level(map)) { case 0: @@ -1429,7 +1461,7 @@ static __u64 blocks_per_migr_unit(struct imsm_dev *dev) */ stripes_per_unit = num_stripes_per_unit_resync(dev); migr_chunk = migr_strip_blocks_resync(dev); - disks = imsm_num_data_members(dev); + disks = imsm_num_data_members(dev, 0); blocks_per_unit = stripes_per_unit * migr_chunk * disks; stripe = __le32_to_cpu(map->blocks_per_strip) * disks; segment = blocks_per_unit / stripe; @@ -1469,6 +1501,86 @@ static int imsm_level_to_layout(int level) return UnSet; } +/* + * load_imsm_migr_rec - read imsm migration record + */ +static int load_imsm_migr_rec(int fd, struct MigrRecord *rec, char *devname) +{ + struct MigrRecord *read_buf; + unsigned long long dsize; + + get_dev_size(fd, NULL, &dsize); + + if (posix_memalign((void **)&read_buf, 512, 512) != 0) { + if (devname) + fprintf(stderr, + Name ": Failed to allocate imsm anchor buffer" + " on %s\n", devname); + return 1; + } + + + if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + if (devname) + fprintf(stderr, + Name ": Cannot seek to anchor block on %s: %s\n", + devname, strerror(errno)); + return 1; + } + + if (read(fd, read_buf, 512) != 512) { + if (devname) + fprintf(stderr, + Name ": Cannot read migr record block on %s: %s\n", + devname, strerror(errno)); + return 1; + } + + memcpy(rec, read_buf, sizeof(*rec)); + free(read_buf); + return 0; +} + +/* + * write_imsm_migr_rec - write imsm migration record + */ +static int write_imsm_migr_rec(int fd, struct MigrRecord *rec, char *devname) +{ + struct MigrRecord *write_buf; + unsigned long long dsize; + + get_dev_size(fd, NULL, &dsize); + + if (posix_memalign((void **)&write_buf, 512, 512) != 0) { + if (devname) + fprintf(stderr, + Name ": Failed to allocate imsm anchor buffer" + " on %s\n", devname); + return 1; + } + + if (lseek64(fd, dsize - 512, SEEK_SET) < 0) { + if (devname) + fprintf(stderr, + Name ": Cannot seek to anchor block on %s: %s\n", + devname, strerror(errno)); + return 1; + } + + memcpy(write_buf, rec, sizeof(*rec)); + + fprintf(stderr, "writing migr_rec at: %llu\n", dsize-512); + if (write(fd, write_buf, 512) != 512) { + if (devname) + fprintf(stderr, + Name ": Cannot write migr record block on %s: %s\n", + devname, strerror(errno)); + return 1; + } + free(write_buf); + return 0; +} + static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) { struct intel_super *super = st->sb; @@ -1476,6 +1588,8 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) struct imsm_map *map = get_imsm_map(dev, 0); struct dl *dl; char *devname; + __u32 blocks_per_member; + __u32 blocks_per_strip; for (dl = super->disks; dl; dl = dl->next) if (dl->raiddisk == info->disk.raid_disk) @@ -1483,7 +1597,13 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) info->container_member = super->current_vol; info->array.raid_disks = map->num_members; info->array.level = get_imsm_raid_level(map); - info->array.layout = imsm_level_to_layout(info->array.level); + if (info->array.level == 4) { + map->raid_level = 5; + info->array.level = 5; + info->array.layout = ALGORITHM_PARITY_N; + } else { + info->array.layout = imsm_level_to_layout(info->array.level); + } info->array.md_minor = -1; info->array.ctime = 0; info->array.utime = 0; @@ -1501,7 +1621,15 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) } info->data_offset = __le32_to_cpu(map->pba_of_lba0); - info->component_size = __le32_to_cpu(map->blocks_per_member); + /* FIXME: For some unknown reason sometimes in a volume created by + * IMSM blocks_per_member is not a multiple of blocks_per strip. + * Fix blocks_per_member here: + */ + blocks_per_member = __le32_to_cpu(map->blocks_per_member); + blocks_per_strip = __le16_to_cpu(map->blocks_per_strip); + blocks_per_member &= ~(blocks_per_strip - 1); + info->component_size = blocks_per_member; + memset(info->uuid, 0, sizeof(info->uuid)); if (map->map_state == IMSM_T_STATE_UNINITIALIZED || dev->vol.dirty) { @@ -1523,9 +1651,64 @@ static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info) */ case MIGR_REBUILD: /* this is handled by container_content_imsm() */ - case MIGR_GEN_MIGR: + case MIGR_GEN_MIGR: { + struct imsm_map *prev_map; + struct MigrRecord migrRec; + struct dl *dl; + char nm[20]; + int dfd; + int data_members; + + /* Read migration record from one of the first two disks */ + for (dl = super->disks; dl; dl = dl->next) { + if (dl->index > 1) + continue; + + sprintf(nm, "%d:%d", dl->major, dl->minor); + dfd = dev_open(nm, O_RDONLY); + if (dfd < 0) + continue; + if (load_imsm_migr_rec(dfd, &migrRec, dl->devname)) + break; + close(dfd); + } + + if (migrRec.currMigrUnit == 0) + info->reshape_progress = 0; + else + info->reshape_progress = + (unsigned long long)(migrRec.currMigrUnit - 1) + * migrRec.blocksPerUnit; + + /* set previous and new map configurations */ + prev_map = get_imsm_map(dev, 1); + info->reshape_active = 1; + info->array.raid_disks = prev_map->num_members; + info->delta_disks = map->num_members - prev_map->num_members; + info->new_level = info->array.level; + info->array.level = get_imsm_raid_level(prev_map); + info->new_layout = info->array.layout; + info->array.layout = imsm_level_to_layout(info->array.level); + info->array.chunk_size = __le16_to_cpu(prev_map->blocks_per_strip) << 9; + info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9; + + if (info->array.level == 4) { + prev_map->raid_level = 5; + info->array.level = 5; + info->array.layout = ALGORITHM_PARITY_N; + } + + /* IMSM FIX for blocks_per_member */ + blocks_per_strip = __le16_to_cpu(prev_map->blocks_per_strip); + blocks_per_member &= ~(blocks_per_strip - 1); + info->component_size = blocks_per_member; + + /* Calculate previous array size */ + data_members = imsm_num_data_members(dev, 1); + info->custom_array_size = blocks_per_member * data_members; + } case MIGR_STATE_CHANGE: - /* FIXME handle other migrations */ + /* FIXME handle other migrations */ default: /* we are not dirty, so... */ info->resync_start = MaxSector; @@ -2093,11 +2276,16 @@ load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd) * map1state=normal) * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal * map1state=degraded) + * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal + * map1state=normal) */ -static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type) +static void migrate(struct imsm_dev *dev, struct intel_super *super, + __u8 to_state, int migr_type) { struct imsm_map *dest; struct imsm_map *src = get_imsm_map(dev, 0); + __u32 ord; + int i; dev->vol.migr_state = 1; set_migr_type(dev, migr_type); @@ -2106,15 +2294,34 @@ static void migrate(struct imsm_dev *dev, __u8 to_state, int migr_type) /* duplicate and then set the target end state in map[0] */ memcpy(dest, src, sizeof_imsm_map(src)); - if ((migr_type == MIGR_REBUILD) || - (migr_type == MIGR_GEN_MIGR)) { - __u32 ord; - int i; + if (migr_type == MIGR_REBUILD) + for (i = 0; i < src->num_members; i++) { + ord = __le32_to_cpu(src->disk_ord_tbl[i]); + set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord)); + } + if (migr_type == MIGR_GEN_MIGR) { for (i = 0; i < src->num_members; i++) { ord = __le32_to_cpu(src->disk_ord_tbl[i]); set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord)); } + + /* Initialize Migration record area */ + struct MigrRecord migrRec; + memset(&migrRec, 0, sizeof(migrRec)); + migrRec.familyNum = super->anchor->family_num; + + struct dl *dl; + char nm[20]; + int dfd; + for (dl = super->disks; dl; dl = dl->next) { + sprintf(nm, "%d:%d", dl->major, dl->minor); + dfd = dev_open(nm, O_RDWR); + if (dfd < 0) + continue; + write_imsm_migr_rec(dfd, &migrRec, dl->devname); + close(dfd); + } } src->map_state = to_state; @@ -2167,6 +2374,7 @@ static int parse_raid_devices(struct intel_super *super) dv = NULL; return 1; } + imsm_copy_dev(dev_new, dev_iter); dv->dev = dev_new; dv->index = i; @@ -2207,6 +2415,90 @@ struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb) return ptr; } +/* Switches N-disk Raid0 map configuration (N+1)disk Raid4 + */ +void switch_raid0_configuration(struct imsm_super *mpb, struct imsm_map *map) +{ + __u8 *src, *dst; + int bytes_to_copy; + + /* get the pointer to the rest of the metadata */ + src = (__u8 *)map + sizeof_imsm_map(map); + + /* change the level and disk number to be compatible with IMSM */ + map->raid_level = 4; + map->num_members++; + + /* get the updated pointer to the rest of the metadata */ + dst = (__u8 *)map + sizeof_imsm_map(map); + /* Now move the rest of the metadata to be properly aligned */ + bytes_to_copy = mpb->mpb_size - (src - (__u8 *)mpb); + if (bytes_to_copy > 0) + memmove(dst, src, bytes_to_copy); + /* Now insert new entry to the map */ + set_imsm_ord_tbl_ent(map, map->num_members - 1/*slot*/, + mpb->num_disks | IMSM_ORD_REBUILD); + /* update size */ + mpb->mpb_size += sizeof(__u32); +} + +/* Make sure that in case of migration in progress we'll convert raid + * personalities so we could continue migrating + */ +void check_mpb_migr_compatibility(struct intel_super *super) +{ + struct imsm_super *mpb = super->anchor; + struct imsm_map *map; + struct imsm_disk *newMissing; + int i, map_modified = 0; + int bytes_to_copy; + __u8 *src, *dst; + + for (i = 0; i < super->anchor->num_raid_devs; i++) { + struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i); + + map_modified = 0; + if (dev_iter->vol.migr_state == MIGR_GEN_MIGR) { + /* This device is migrating, check for raid0 levels */ + map = get_imsm_map(dev_iter, 0); + if (map->raid_level == 0) { + /* Map0: Migrating raid0 detected - lets switch it to level4 */ + switch_raid0_configuration(mpb, map); + map_modified++; + } + map = get_imsm_map(dev_iter, 1); + if (map->raid_level == 0) { + /* Map1: Migrating raid0 detected - lets switch it to level4 */ + switch_raid0_configuration(mpb, map); + map_modified++; + } + } + } + + if (map_modified > 0) { + /* Add missing device to the MPB disk table */ + src = (__u8 *)mpb->disk + sizeof(struct imsm_disk) * mpb->num_disks; + mpb->num_disks++; + dst = (__u8 *)mpb->disk + sizeof(struct imsm_disk) * mpb->num_disks; + + /* Now move the rest of the metadata to be properly aligned */ + bytes_to_copy = mpb->mpb_size - (src - (__u8 *)mpb); + if (bytes_to_copy > 0) + memmove(dst, src, bytes_to_copy); + + /* Update mpb size */ + mpb->mpb_size += sizeof(struct imsm_disk); + + /* Now fill in the new missing disk fields */ + newMissing = (struct imsm_disk *)src; + sprintf((char *)newMissing->serial, "%s", "MISSING DISK"); + /* copy the device size from the first disk */ + newMissing->total_blocks = mpb->disk[0].total_blocks; + newMissing->scsi_id = 0x0; + newMissing->status = FAILED_DISK; + } +} + static void __free_imsm(struct intel_super *super, int free_disks); /* load_imsm_mpb - read matrix metadata @@ -2314,6 +2606,12 @@ static int load_imsm_mpb(int fd, struct intel_super *super, char *devname) return 3; } + + /* Now make sure that in case of migration + * we'll convert raid personalities + */ + check_mpb_migr_compatibility(super); + /* FIXME the BBM log is disk specific so we cannot use this global * buffer for all disks. Ok for now since we only look at the global * bbm_log_size parameter to gate assembly @@ -3729,7 +4027,7 @@ static int create_array(struct supertype *st, int dev_idx) imsm_copy_dev(&u->dev, dev); inf = get_disk_info(u); for (i = 0; i < map->num_members; i++) { - int idx = get_imsm_disk_idx(dev, i); + int idx = get_imsm_disk_idx(dev, i, -1); disk = get_imsm_disk(super, idx); serialcpy(inf[i].serial, disk->serial); @@ -4415,7 +4713,7 @@ static struct mdinfo *container_content_imsm(struct supertype *st) mpb = super->anchor; else return NULL; - + fprintf(stderr, "calling container_content_imsm()...\n"); /* do not assemble arrays that might have bad blocks */ if (imsm_bbm_log_size(super->anchor)) { fprintf(stderr, Name ": BBM log found in metadata. " @@ -4460,8 +4758,8 @@ static struct mdinfo *container_content_imsm(struct supertype *st) __u32 ord; skip = 0; - idx = get_imsm_disk_idx(dev, slot); - ord = get_imsm_ord_tbl_ent(dev, slot); + idx = get_imsm_disk_idx(dev, slot, 0); + ord = get_imsm_ord_tbl_ent(dev, slot, 0); for (d = super->disks; d ; d = d->next) if (d->index == idx) break; @@ -4574,7 +4872,7 @@ static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int insync = insync; for (i = 0; i < map->num_members; i++) { - __u32 ord = get_imsm_ord_tbl_ent(dev, i); + __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1); int idx = ord_to_idx(ord); struct imsm_disk *disk; @@ -4767,13 +5065,14 @@ static int imsm_set_array_state(struct active_array *a, int consistent) end_migration(dev, map_state); super->updates_pending++; } - } else if (!is_resyncing(dev) && !failed) { + } else if (!is_resyncing(dev) && !failed + && (migr_type(dev) != MIGR_GEN_MIGR)) { /* mark the start of the init process if nothing is failed */ dprintf("imsm: mark resync start\n"); if (map->map_state == IMSM_T_STATE_UNINITIALIZED) - migrate(dev, IMSM_T_STATE_NORMAL, MIGR_INIT); + migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT); else - migrate(dev, IMSM_T_STATE_NORMAL, MIGR_REPAIR); + migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR); super->updates_pending++; } @@ -4783,12 +5082,13 @@ static int imsm_set_array_state(struct active_array *a, int consistent) __u32 units32; __u64 units; - if ((migr_type(dev) == MIGR_REBUILD) || - (migr_type(dev) == MIGR_GEN_MIGR)) /* for OLCE we have to do it, - similar to rebuild */ + if (migr_type(dev) == MIGR_REBUILD) { units = min_recovery_start(&a->info) / blocks_per_unit; - else + } else if (migr_type(dev) == MIGR_GEN_MIGR) { + units = a->reshape_position / blocks_per_unit; + } else { units = a->info.resync_start / blocks_per_unit; + } units32 = units; /* check that we did not overflow 32-bits, and that @@ -4848,7 +5148,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state) dprintf("imsm: set_disk %d:%x\n", n, state); - ord = get_imsm_ord_tbl_ent(dev, n); + ord = get_imsm_ord_tbl_ent(dev, n, -1); disk = get_imsm_disk(super, ord_to_idx(ord)); /* check for new failures */ @@ -4946,7 +5246,7 @@ static void imsm_sync_metadata(struct supertype *container) static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int i = get_imsm_disk_idx(dev, idx); + int i = get_imsm_disk_idx(dev, idx, -1); struct dl *dl; for (dl = super->disks; dl; dl = dl->next) @@ -4966,7 +5266,7 @@ static struct dl *imsm_add_spare(struct intel_super *super, int slot, struct active_array *a, int activate_new) { struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member); - int idx = get_imsm_disk_idx(dev, slot); + int idx = get_imsm_disk_idx(dev, slot, -1); struct imsm_super *mpb = super->anchor; struct imsm_map *map; unsigned long long pos; @@ -5375,7 +5675,7 @@ static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_ int j; for (i = 0; i < map->num_members; i++) { - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1)); for (j = 0; j < new_map->num_members; j++) if (serialcmp(disk->serial, inf[j].serial) == 0) return 1; @@ -5526,7 +5826,7 @@ static void imsm_process_update(struct supertype *st, /* Update slots in the raid map */ for (i = 0; i < u->sl_changed; i++) { int idx; - idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot); + idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot, 0); /* now after slot changes */ set_imsm_ord_tbl_ent(map, u->sl_changes[i].new_slot, idx); } @@ -5598,7 +5898,7 @@ static void imsm_process_update(struct supertype *st, /* Update slots in the raid map */ for (i = 0; i < u->sl_changed; i++) { int idx; - idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot); + idx = get_imsm_ord_tbl_ent(dev, u->sl_changes[i].prev_slot, 0); /* now after slot changes */ set_imsm_ord_tbl_ent(map, u->sl_changes[i].new_slot, idx); } @@ -5657,10 +5957,14 @@ static void imsm_process_update(struct supertype *st, fprintf(stderr, "error: IMSM: Cannot find requested device\n"); goto update_add_spare_exit; } + /* use first map as a base */ migr_state = dev->vol.migr_state; dev->vol.migr_state = 0; + if (migr_state != 0) + return; + map = get_imsm_map(dev, 0); if (map == NULL) { fprintf(stderr, "error: IMSM: Invalid map for requested device\n"); @@ -5701,7 +6005,7 @@ static void imsm_process_update(struct supertype *st, /* set migration/ no migration in progress */ dev->vol.migr_state = MIGR_GEN_MIGR; to_state = imsm_check_degraded(super, dev, 0); - migrate(dev, to_state, dev->vol.migr_state); + migrate(dev, super, to_state, dev->vol.migr_state); migr_map = get_imsm_map(dev, 1); /* now switch to first map only @@ -5735,7 +6039,7 @@ static void imsm_process_update(struct supertype *st, int j; for (j=0; j < map->num_members ;j++) { - hdd_index = get_imsm_disk_idx(dev, j); + hdd_index = get_imsm_disk_idx(dev, j, 0); /* check if disk serial is equal to TAKEOVER_MISSING_DISK */ if ((hdd_index == dl->index) && (strncmp((char*)dl->disk.serial, @@ -5744,7 +6048,7 @@ static void imsm_process_update(struct supertype *st, dl_missing = dl; slot_missing = j; missing_index = hdd_index; - missing_ord = get_imsm_ord_tbl_ent(dev, j); + missing_ord = get_imsm_ord_tbl_ent(dev, j, 0); break; } } @@ -5835,6 +6139,10 @@ static void imsm_process_update(struct supertype *st, unsigned long long array_blocks; unsigned long long array_blocks_current; int used_disks = 0; + + map->blocks_per_strip = a->info.new_chunk >> 9; + map->num_data_stripes = map->blocks_per_member / map->blocks_per_strip; + /* count used disks for data */ switch (map->raid_level) { case 0: @@ -5882,7 +6190,7 @@ update_add_spare_exit: struct dl *dl; unsigned int found; int failed; - int victim = get_imsm_disk_idx(dev, u->slot); + int victim = get_imsm_disk_idx(dev, u->slot, -1); int i; for (dl = super->disks; dl; dl = dl->next) @@ -5905,7 +6213,7 @@ update_add_spare_exit: for (i = 0; i < map->num_members; i++) { if (i == u->slot) continue; - disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i)); + disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1)); if (!disk || is_failed(disk)) failed++; } @@ -5922,7 +6230,7 @@ update_add_spare_exit: /* mark rebuild */ to_state = imsm_check_degraded(super, dev, failed); map->map_state = IMSM_T_STATE_DEGRADED; - migrate(dev, to_state, MIGR_REBUILD); + migrate(dev, super, to_state, MIGR_REBUILD); migr_map = get_imsm_map(dev, 1); set_imsm_ord_tbl_ent(map, u->slot, dl->index); set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD); @@ -6264,7 +6572,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, int index) /* update ord entries being careful not to propagate * ord-flags to the first map */ - ord = get_imsm_ord_tbl_ent(dev, j); + ord = get_imsm_ord_tbl_ent(dev, j, -1); if (ord_to_idx(ord) <= index) continue; ��.n��������+%������w��{.n�����{����w��ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f