On Fri, Oct 20, 2023 at 7:25 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > rcu is not used correctly here, because synchronize_rcu() is called > before replacing old value, for example: > > remove_and_add_spares // other path > synchronize_rcu > // called before replacing old value > set_bit(RemoveSynchronized) > rcu_read_lock() > rdev = conf->mirros[].rdev > pers->hot_remove_disk > conf->mirros[].rdev = NULL; > if (!test_bit(RemoveSynchronized)) > synchronize_rcu > /* > * won't be called, and won't wait > * for concurrent readers to be done. > */ > // access rdev after remove_and_add_spares() > rcu_read_unlock() > > Fortunately, there is a separate rcu protection to prevent such rdev > to be freed: > > md_kick_rdev_from_array //other path > rcu_read_lock() > rdev = conf->mirros[].rdev > list_del_rcu(&rdev->same_set) > > rcu_read_unlock() > /* > * rdev can be removed from conf, but > * rdev won't be freed. > */ > synchronize_rcu() > free rdev > > Hence remove this useless flag and prepare to remove rcu protection to > access rdev from 'conf'. > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> RemoveSynchronized is not removed from md.h. > --- > drivers/md/md-multipath.c | 9 --------- > drivers/md/md.c | 37 ++++++------------------------------- > drivers/md/raid1.c | 9 --------- > drivers/md/raid10.c | 9 --------- > drivers/md/raid5.c | 9 --------- > 5 files changed, 6 insertions(+), 67 deletions(-) > > diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c > index d22276870283..aa77133f3188 100644 > --- a/drivers/md/md-multipath.c > +++ b/drivers/md/md-multipath.c > @@ -258,15 +258,6 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) > goto abort; > } > p->rdev = NULL; > - if (!test_bit(RemoveSynchronized, &rdev->flags)) { > - synchronize_rcu(); > - if (atomic_read(&rdev->nr_pending)) { > - /* lost the race, try later */ > - err = -EBUSY; > - p->rdev = rdev; > - goto abort; > - } > - } > err = md_integrity_register(mddev); > } > abort: > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 09686d8db983..68f3bb6e89cb 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -9250,44 +9250,19 @@ static int remove_and_add_spares(struct mddev *mddev, > struct md_rdev *rdev; > int spares = 0; > int removed = 0; > - bool remove_some = false; > > if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) > /* Mustn't remove devices when resync thread is running */ > return 0; > > rdev_for_each(rdev, mddev) { > - if ((this == NULL || rdev == this) && > - rdev->raid_disk >= 0 && > - !test_bit(Blocked, &rdev->flags) && > - test_bit(Faulty, &rdev->flags) && > - atomic_read(&rdev->nr_pending)==0) { > - /* Faulty non-Blocked devices with nr_pending == 0 > - * never get nr_pending incremented, > - * never get Faulty cleared, and never get Blocked set. > - * So we can synchronize_rcu now rather than once per device > - */ > - remove_some = true; > - set_bit(RemoveSynchronized, &rdev->flags); > - } > - } > - > - if (remove_some) > - synchronize_rcu(); > - rdev_for_each(rdev, mddev) { > - if ((this == NULL || rdev == this) && > - (test_bit(RemoveSynchronized, &rdev->flags) || > - rdev_removeable(rdev))) { > - if (mddev->pers->hot_remove_disk( > - mddev, rdev) == 0) { > - sysfs_unlink_rdev(mddev, rdev); > - rdev->saved_raid_disk = rdev->raid_disk; > - rdev->raid_disk = -1; > - removed++; > - } > + if ((this == NULL || rdev == this) && rdev_removeable(rdev) && > + !mddev->pers->hot_remove_disk(mddev, rdev)) { > + sysfs_unlink_rdev(mddev, rdev); > + rdev->saved_raid_disk = rdev->raid_disk; > + rdev->raid_disk = -1; > + removed++; > } > - if (remove_some && test_bit(RemoveSynchronized, &rdev->flags)) > - clear_bit(RemoveSynchronized, &rdev->flags); > } > > if (removed && mddev->kobj.sd) > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > index c13088eae401..4348d670439d 100644 > --- a/drivers/md/raid1.c > +++ b/drivers/md/raid1.c > @@ -1836,15 +1836,6 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) > goto abort; > } > p->rdev = NULL; > - if (!test_bit(RemoveSynchronized, &rdev->flags)) { > - synchronize_rcu(); > - if (atomic_read(&rdev->nr_pending)) { > - /* lost the race, try later */ > - err = -EBUSY; > - p->rdev = rdev; > - goto abort; > - } > - } > if (conf->mirrors[conf->raid_disks + number].rdev) { > /* We just removed a device that is being replaced. > * Move down the replacement. We drain all IO before > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > index 4b5f34f320c8..33ab00323cae 100644 > --- a/drivers/md/raid10.c > +++ b/drivers/md/raid10.c > @@ -2219,15 +2219,6 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) > goto abort; > } > *rdevp = NULL; > - if (!test_bit(RemoveSynchronized, &rdev->flags)) { > - synchronize_rcu(); > - if (atomic_read(&rdev->nr_pending)) { > - /* lost the race, try later */ > - err = -EBUSY; > - *rdevp = rdev; > - goto abort; > - } > - } > if (p->replacement) { > /* We must have just cleared 'rdev' */ > p->rdev = p->replacement; > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index 27a4dce51c92..a80be51b4825 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -8202,15 +8202,6 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) > goto abort; > } > *rdevp = NULL; > - if (!test_bit(RemoveSynchronized, &rdev->flags)) { > - lockdep_assert_held(&mddev->reconfig_mutex); > - synchronize_rcu(); > - if (atomic_read(&rdev->nr_pending)) { > - /* lost the race, try later */ > - err = -EBUSY; > - rcu_assign_pointer(*rdevp, rdev); > - } > - } > if (!err) { > err = log_modify(conf, rdev, false); > if (err) > -- > 2.39.2 >