On Sun, Oct 15, 2023 at 6:28 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > It's safe to accees rdev from conf: > - If any spinlock is held, because synchronize_rcu() from > md_kick_rdev_from_array() will prevent 'rdev' to be freed until > spinlock is released; > - If 'reconfig_lock' is held, because rdev can't be added or removed from > array; Maybe add lockdep asserts for the above cases? Thanks, Song > - If there is normal IO inflight, because mddev_suspend() will prevent > rdev to be added or removed from array; > - If there is sync IO inflight, because 'MD_RECOVERY_RUNNING' is > checked in remove_and_add_spares(). > > And these will cover all the scenarios in raid1. > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> > --- > drivers/md/raid1.c | 57 +++++++++++++++++----------------------------- > 1 file changed, 21 insertions(+), 36 deletions(-) > > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > index 4348d670439d..5c647036663d 100644 > --- a/drivers/md/raid1.c > +++ b/drivers/md/raid1.c > @@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect > int choose_first; > int choose_next_idle; > > - rcu_read_lock(); > /* > * Check if we can balance. We can balance on the whole > * device if no resync is going on, or below the resync window. > @@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect > unsigned int pending; > bool nonrot; > > - rdev = rcu_dereference(conf->mirrors[disk].rdev); > + rdev = conf->mirrors[disk].rdev; > if (r1_bio->bios[disk] == IO_BLOCKED > || rdev == NULL > || test_bit(Faulty, &rdev->flags)) > @@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect > } > > if (best_disk >= 0) { > - rdev = rcu_dereference(conf->mirrors[best_disk].rdev); > + rdev = conf->mirrors[best_disk].rdev; > if (!rdev) > goto retry; > atomic_inc(&rdev->nr_pending); > @@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect > > conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; > } > - rcu_read_unlock(); > *max_sectors = sectors; > > return best_disk; > @@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, > > if (r1bio_existed) { > /* Need to get the block device name carefully */ > - struct md_rdev *rdev; > - rcu_read_lock(); > - rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev); > + struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev; > + > if (rdev) > snprintf(b, sizeof(b), "%pg", rdev->bdev); > else > strcpy(b, "???"); > - rcu_read_unlock(); > } > > /* > @@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, > > disks = conf->raid_disks * 2; > blocked_rdev = NULL; > - rcu_read_lock(); > max_sectors = r1_bio->sectors; > for (i = 0; i < disks; i++) { > - struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); > + struct md_rdev *rdev = conf->mirrors[i].rdev; > > /* > * The write-behind io is only attempted on drives marked as > @@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, > } > r1_bio->bios[i] = bio; > } > - rcu_read_unlock(); > > if (unlikely(blocked_rdev)) { > /* Wait for this device to become unblocked */ > @@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev) > struct r1conf *conf = mddev->private; > int i; > > + lockdep_assert_held(&mddev->lock); > + > seq_printf(seq, " [%d/%d] [", conf->raid_disks, > conf->raid_disks - mddev->degraded); > - rcu_read_lock(); > for (i = 0; i < conf->raid_disks; i++) { > - struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); > + struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev); > + > seq_printf(seq, "%s", > rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); > } > - rcu_read_unlock(); > seq_printf(seq, "]"); > } > > @@ -1785,7 +1780,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) > */ > if (rdev->saved_raid_disk < 0) > conf->fullsync = 1; > - rcu_assign_pointer(p->rdev, rdev); > + WRITE_ONCE(p->rdev, rdev); > break; > } > if (test_bit(WantReplacement, &p->rdev->flags) && > @@ -1801,7 +1796,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) > rdev->raid_disk = repl_slot; > err = 0; > conf->fullsync = 1; > - rcu_assign_pointer(p[conf->raid_disks].rdev, rdev); > + WRITE_ONCE(p[conf->raid_disks].rdev, rdev); > } > > return err; > @@ -1835,7 +1830,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) > err = -EBUSY; > goto abort; > } > - p->rdev = NULL; > + WRITE_ONCE(p->rdev, NULL); > if (conf->mirrors[conf->raid_disks + number].rdev) { > /* We just removed a device that is being replaced. > * Move down the replacement. We drain all IO before > @@ -1856,7 +1851,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) > goto abort; > } > clear_bit(Replacement, &repl->flags); > - p->rdev = repl; > + WRITE_ONCE(p->rdev, repl); > conf->mirrors[conf->raid_disks + number].rdev = NULL; > unfreeze_array(conf); > } > @@ -2253,8 +2248,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, > sector_t first_bad; > int bad_sectors; > > - rcu_read_lock(); > - rdev = rcu_dereference(conf->mirrors[d].rdev); > + rdev = conf->mirrors[d].rdev; > if (rdev && > (test_bit(In_sync, &rdev->flags) || > (!test_bit(Faulty, &rdev->flags) && > @@ -2262,15 +2256,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk, > is_badblock(rdev, sect, s, > &first_bad, &bad_sectors) == 0) { > atomic_inc(&rdev->nr_pending); > - rcu_read_unlock(); > if (sync_page_io(rdev, sect, s<<9, > conf->tmppage, REQ_OP_READ, false)) > success = 1; > rdev_dec_pending(rdev, mddev); > if (success) > break; > - } else > - rcu_read_unlock(); > + } > + > d++; > if (d == conf->raid_disks * 2) > d = 0; > @@ -2289,29 +2282,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk, > if (d==0) > d = conf->raid_disks * 2; > d--; > - rcu_read_lock(); > - rdev = rcu_dereference(conf->mirrors[d].rdev); > + rdev = conf->mirrors[d].rdev; > if (rdev && > !test_bit(Faulty, &rdev->flags)) { > atomic_inc(&rdev->nr_pending); > - rcu_read_unlock(); > r1_sync_page_io(rdev, sect, s, > conf->tmppage, WRITE); > rdev_dec_pending(rdev, mddev); > - } else > - rcu_read_unlock(); > + } > } > d = start; > while (d != read_disk) { > if (d==0) > d = conf->raid_disks * 2; > d--; > - rcu_read_lock(); > - rdev = rcu_dereference(conf->mirrors[d].rdev); > + rdev = conf->mirrors[d].rdev; > if (rdev && > !test_bit(Faulty, &rdev->flags)) { > atomic_inc(&rdev->nr_pending); > - rcu_read_unlock(); > if (r1_sync_page_io(rdev, sect, s, > conf->tmppage, READ)) { > atomic_add(s, &rdev->corrected_errors); > @@ -2322,8 +2310,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, > rdev->bdev); > } > rdev_dec_pending(rdev, mddev); > - } else > - rcu_read_unlock(); > + } > } > sectors -= s; > sect += s; > @@ -2704,7 +2691,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, > > r1_bio = raid1_alloc_init_r1buf(conf); > > - rcu_read_lock(); > /* > * If we get a correctably read error during resync or recovery, > * we might want to read from a different device. So we > @@ -2725,7 +2711,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, > struct md_rdev *rdev; > bio = r1_bio->bios[i]; > > - rdev = rcu_dereference(conf->mirrors[i].rdev); > + rdev = conf->mirrors[i].rdev; > if (rdev == NULL || > test_bit(Faulty, &rdev->flags)) { > if (i < conf->raid_disks) > @@ -2783,7 +2769,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, > bio->bi_opf |= MD_FAILFAST; > } > } > - rcu_read_unlock(); > if (disk < 0) > disk = wonly; > r1_bio->read_disk = disk; > -- > 2.39.2 >