On Tue, 22 Jan 2013 21:42:18 -0600 Jonathan Brassow <jbrassow@xxxxxxxxxx> wrote: > Neil, > > Here is the RAID10 redundancy check fix backported for linux-3.7 Thanks. So this goes into 3.7.y and bumps the dm-raid version to 1.3.2 It should also go into 3.8-rc and bump the dm-raid version there to 1.4.1 Then "Add support for MD's RAID10 "far" and "offset" algorithms" which is due to go into 3.9-rc should bump the dm-raid version to 1.4.2. Is that right? I' re-arranged the patches in my for-next to match this - please check I didn't mess anything up. Then I'll send them off. Thanks, NeilBrown > > Thanks, > brassow > > > DM RAID: Fix RAID10's check for sufficient redundancy > > Before attempting to activate a RAID array, it is checked for sufficient > redundancy. That is, we make sure that there are not too many failed > devices - or devices specified for rebuild - to undermine our ability to > activate the array. The current code performs this check twice - once to > ensure there were not too many devices specified for rebuild by the user > ('validate_rebuild_devices') and again after possibly experiencing a failure > to read the superblock ('analyse_superblocks'). Neither of these checks are > sufficient. The first check is done properly but with insufficient > information about the possible failure state of the devices to make a good > determination if the array can be activated. The second check is simply > done wrong in the case of RAID10 because it doesn't account for the > independence of the stripes (i.e. mirror sets). The solution is to use the > properly written check ('validate_rebuild_devices'), but perform the check > after the superblocks have been read and we know which devices have failed. > This gives us one check instead of two and performs it in a location where > it can be done right. > > Only RAID10 was affected and it was affected in the following ways: > - the code did not properly catch the condition where a user specified > a device for rebuild that already had a failed device in the same mirror > set. (This condition would, however, be caught at a deeper level in MD.) > - the code triggers a false positive and denies activation when devices in > independent mirror sets have failed - counting the failures as though they > were all in the same set. > > The most likely place this error was introduced (or this patch should have > been included) is in commit 4ec1e369 - first introduced in v3.7-rc1. > > Signed-off-by: Jonathan Brassow <jbrassow@xxxxxxxxxx> > Index: linux-upstream/Documentation/device-mapper/dm-raid.txt > =================================================================== > --- linux-upstream.orig/Documentation/device-mapper/dm-raid.txt > +++ linux-upstream/Documentation/device-mapper/dm-raid.txt > @@ -141,3 +141,4 @@ Version History > 1.2.0 Handle creation of arrays that contain failed devices. > 1.3.0 Added support for RAID 10 > 1.3.1 Allow device replacement/rebuild for RAID 10 > +1.3.2 Fix/improve redundancy checking for RAID10 > Index: linux-upstream/drivers/md/dm-raid.c > =================================================================== > --- linux-upstream.orig/drivers/md/dm-raid.c > +++ linux-upstream/drivers/md/dm-raid.c > @@ -338,24 +338,22 @@ static int validate_region_size(struct r > } > > /* > - * validate_rebuild_devices > + * validate_raid_redundancy > * @rs > * > - * Determine if the devices specified for rebuild can result in a valid > - * usable array that is capable of rebuilding the given devices. > + * Determine if there are enough devices in the array that haven't > + * failed (or are being rebuilt) to form a usable array. > * > * Returns: 0 on success, -EINVAL on failure. > */ > -static int validate_rebuild_devices(struct raid_set *rs) > +static int validate_raid_redundancy(struct raid_set *rs) > { > unsigned i, rebuild_cnt = 0; > unsigned rebuilds_per_group, copies, d; > > - if (!(rs->print_flags & DMPF_REBUILD)) > - return 0; > - > for (i = 0; i < rs->md.raid_disks; i++) > - if (!test_bit(In_sync, &rs->dev[i].rdev.flags)) > + if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || > + !rs->dev[i].rdev.sb_page) > rebuild_cnt++; > > switch (rs->raid_type->level) { > @@ -391,27 +389,24 @@ static int validate_rebuild_devices(stru > * A A B B C > * C D D E E > */ > - rebuilds_per_group = 0; > for (i = 0; i < rs->md.raid_disks * copies; i++) { > + if (!(i % copies)) > + rebuilds_per_group = 0; > d = i % rs->md.raid_disks; > - if (!test_bit(In_sync, &rs->dev[d].rdev.flags) && > + if ((!rs->dev[d].rdev.sb_page || > + !test_bit(In_sync, &rs->dev[d].rdev.flags)) && > (++rebuilds_per_group >= copies)) > goto too_many; > - if (!((i + 1) % copies)) > - rebuilds_per_group = 0; > } > break; > default: > - DMERR("The rebuild parameter is not supported for %s", > - rs->raid_type->name); > - rs->ti->error = "Rebuild not supported for this RAID type"; > - return -EINVAL; > + if (rebuild_cnt) > + return -EINVAL; > } > > return 0; > > too_many: > - rs->ti->error = "Too many rebuild devices specified"; > return -EINVAL; > } > > @@ -662,9 +657,6 @@ static int parse_raid_params(struct raid > } > rs->md.dev_sectors = sectors_per_dev; > > - if (validate_rebuild_devices(rs)) > - return -EINVAL; > - > /* Assume there are no metadata devices until the drives are parsed */ > rs->md.persistent = 0; > rs->md.external = 1; > @@ -993,28 +985,10 @@ static int super_validate(struct mddev * > static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) > { > int ret; > - unsigned redundancy = 0; > struct raid_dev *dev; > struct md_rdev *rdev, *tmp, *freshest; > struct mddev *mddev = &rs->md; > > - switch (rs->raid_type->level) { > - case 1: > - redundancy = rs->md.raid_disks - 1; > - break; > - case 4: > - case 5: > - case 6: > - redundancy = rs->raid_type->parity_devs; > - break; > - case 10: > - redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; > - break; > - default: > - ti->error = "Unknown RAID type"; > - return -EINVAL; > - } > - > freshest = NULL; > rdev_for_each_safe(rdev, tmp, mddev) { > /* > @@ -1043,44 +1017,43 @@ static int analyse_superblocks(struct dm > break; > default: > dev = container_of(rdev, struct raid_dev, rdev); > - if (redundancy--) { > - if (dev->meta_dev) > - dm_put_device(ti, dev->meta_dev); > + if (dev->meta_dev) > + dm_put_device(ti, dev->meta_dev); > > - dev->meta_dev = NULL; > - rdev->meta_bdev = NULL; > + dev->meta_dev = NULL; > + rdev->meta_bdev = NULL; > > - if (rdev->sb_page) > - put_page(rdev->sb_page); > + if (rdev->sb_page) > + put_page(rdev->sb_page); > > - rdev->sb_page = NULL; > + rdev->sb_page = NULL; > > - rdev->sb_loaded = 0; > + rdev->sb_loaded = 0; > > - /* > - * We might be able to salvage the data device > - * even though the meta device has failed. For > - * now, we behave as though '- -' had been > - * set for this device in the table. > - */ > - if (dev->data_dev) > - dm_put_device(ti, dev->data_dev); > - > - dev->data_dev = NULL; > - rdev->bdev = NULL; > + /* > + * We might be able to salvage the data device > + * even though the meta device has failed. For > + * now, we behave as though '- -' had been > + * set for this device in the table. > + */ > + if (dev->data_dev) > + dm_put_device(ti, dev->data_dev); > > - list_del(&rdev->same_set); > + dev->data_dev = NULL; > + rdev->bdev = NULL; > > - continue; > - } > - ti->error = "Failed to load superblock"; > - return ret; > + list_del(&rdev->same_set); > } > } > > if (!freshest) > return 0; > > + if (validate_raid_redundancy(rs)) { > + rs->ti->error = "Insufficient redundancy to activate array"; > + return -EINVAL; > + } > + > /* > * Validation of the freshest device provides the source of > * validation for the remaining devices. > @@ -1430,7 +1403,7 @@ static void raid_resume(struct dm_target > > static struct target_type raid_target = { > .name = "raid", > - .version = {1, 3, 1}, > + .version = {1, 3, 2}, > .module = THIS_MODULE, > .ctr = raid_ctr, > .dtr = raid_dtr, >
Attachment:
signature.asc
Description: PGP signature