On Tue, Oct 17, 2017 at 04:18:36PM +1100, NeilBrown wrote: > The ->recovery_offset shows how much of a non-InSync device is actually > in sync - how much has been recoveryed. > > When performing a recovery, ->curr_resync and ->curr_resync_completed > follow the device address being recovered and so can be used to update > ->recovery_offset. > > When performing a reshape, ->curr_resync* might follow the device > addresses (raid5) or might follow array addresses (raid10), so cannot > in general be used to set ->recovery_offset. When reshaping backwards, > ->curre_resync* measures from the *end* of the array-or-device, so is > particularly unhelpful. > > So change the common code in md.c to only use ->curr_resync_complete > for the simple recovery case, and add code to raid5.c to update > ->recovery_offset during a forwards reshape. > > Signed-off-by: NeilBrown <neilb@xxxxxxxx> > --- > drivers/md/md.c | 32 +++++++++++++++++++++----------- > drivers/md/raid5.c | 18 ++++++++++++++++++ > 2 files changed, 39 insertions(+), 11 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index a9f1352b3849..d1dfc9879368 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -2453,10 +2453,17 @@ void md_update_sb(struct mddev *mddev, int force_change) > } > } > > - /* First make sure individual recovery_offsets are correct */ > + /* First make sure individual recovery_offsets are correct > + * curr_resync_completed can only be used during recovery. > + * During reshape/resync it might use array-addresses rather > + * that device addresses. > + */ > rdev_for_each(rdev, mddev) { > if (rdev->raid_disk >= 0 && > mddev->delta_disks >= 0 && > + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && > + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery) && > + !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && > !test_bit(Journal, &rdev->flags) && > !test_bit(In_sync, &rdev->flags) && > mddev->curr_resync_completed > rdev->recovery_offset) > @@ -8507,16 +8514,19 @@ void md_do_sync(struct md_thread *thread) > } else { > if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) > mddev->curr_resync = MaxSector; > - rcu_read_lock(); > - rdev_for_each_rcu(rdev, mddev) > - if (rdev->raid_disk >= 0 && > - mddev->delta_disks >= 0 && > - !test_bit(Journal, &rdev->flags) && > - !test_bit(Faulty, &rdev->flags) && > - !test_bit(In_sync, &rdev->flags) && > - rdev->recovery_offset < mddev->curr_resync) > - rdev->recovery_offset = mddev->curr_resync; > - rcu_read_unlock(); > + if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && > + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { > + rcu_read_lock(); > + rdev_for_each_rcu(rdev, mddev) > + if (rdev->raid_disk >= 0 && > + mddev->delta_disks >= 0 && > + !test_bit(Journal, &rdev->flags) && > + !test_bit(Faulty, &rdev->flags) && > + !test_bit(In_sync, &rdev->flags) && > + rdev->recovery_offset < mddev->curr_resync) > + rdev->recovery_offset = mddev->curr_resync; > + rcu_read_unlock(); > + } > } > } > skip: > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index a8df52130f8a..89ad79614309 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -5739,6 +5739,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk > */ > struct r5conf *conf = mddev->private; > struct stripe_head *sh; > + struct md_rdev *rdev; > sector_t first_sector, last_sector; > int raid_disks = conf->previous_raid_disks; > int data_disks = raid_disks - conf->max_degraded; > @@ -5861,6 +5862,15 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk > return 0; > mddev->reshape_position = conf->reshape_progress; > mddev->curr_resync_completed = sector_nr; > + if (!mddev->reshape_backwards) > + /* Can update recovery_offset */ > + rdev_for_each(rdev, mddev) > + if (rdev->raid_disk >= 0 && > + !test_bit(Journal, &rdev->flags) && > + !test_bit(In_sync, &rdev->flags) && > + rdev->recovery_offset < sector_nr) > + rdev->recovery_offset = sector_nr; > + > conf->reshape_checkpoint = jiffies; > set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); > md_wakeup_thread(mddev->thread); > @@ -5959,6 +5969,14 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk > goto ret; > mddev->reshape_position = conf->reshape_progress; > mddev->curr_resync_completed = sector_nr; > + if (!mddev->reshape_backwards) > + /* Can update recovery_offset */ > + rdev_for_each(rdev, mddev) > + if (rdev->raid_disk >= 0 && > + !test_bit(Journal, &rdev->flags) && > + !test_bit(In_sync, &rdev->flags) && > + rdev->recovery_offset < sector_nr) > + rdev->recovery_offset = sector_nr; > conf->reshape_checkpoint = jiffies; > set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); > md_wakeup_thread(mddev->thread); > > > -- Hi, This patch has caused a regression in following IMSM RAID scenario: mdadm --create /dev/md/imsm0 --metadata=imsm --raid-devices=4 /dev/nvme[0-3]n1 --run mdadm --create /dev/md/r10 --level=10 --size=100M --raid-devices=4 /dev/nvme[0-3]n1 --run --assume-clean MDADM_EXPERIMENTAL=1 mdadm --grow /dev/md/r10 --level=0 MDADM_EXPERIMENTAL=1 mdadm --grow /dev/md/imsm0 --raid-devices=4 cat /proc/mdstat Personalities : [raid10] [raid0] [raid6] [raid5] [raid4] md126 : active raid4 nvme0n1[3] nvme2n1[2] nvme1n1[1] 409600 blocks super external:/md127/0 level 4, 128k chunk, algorithm 0 [5/3] [UU_U_] md127 : inactive nvme3n1[3](S) nvme2n1[2](S) nvme1n1[1](S) nvme0n1[0](S) 4420 blocks super external:imsm The array should have been reshaped to 4-disk RAID0 but it has failed in the middle of reshape as RAID4. Following condition is not true for above scenario: > + if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && > + test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) { Tomek -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html