This patch depends on patch 07. All rdevs running in RAID 1/10 switch nr_pending to atomic mode. The value of nr_pending is read in a normal operation (choose_best_rdev()). Therefore, nr_pending must always be consistent. Signed-off-by: Keisuke TADA <keisuke1.tada@xxxxxxxxxx> Signed-off-by: Toshifumi OHTAKE <toshifumi.ootake@xxxxxxxxxx> --- drivers/md/md.h | 14 ++++++++++++++ drivers/md/raid1.c | 7 +++++++ drivers/md/raid10.c | 4 ++++ 3 files changed, 25 insertions(+) diff --git a/drivers/md/md.h b/drivers/md/md.h index ab09e312c9bb..57b09b567ffa 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -236,6 +236,20 @@ static inline unsigned long nr_pending_read(struct md_rdev *rdev) return atomic_long_read(&rdev->nr_pending.data->count); } +static inline bool nr_pending_is_percpu_mode(struct md_rdev *rdev) +{ + unsigned long __percpu *percpu_count; + + return __ref_is_percpu(&rdev->nr_pending, &percpu_count); +} + +static inline bool nr_pending_is_atomic_mode(struct md_rdev *rdev) +{ + unsigned long __percpu *percpu_count; + + return !__ref_is_percpu(&rdev->nr_pending, &percpu_count); +} + static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, sector_t *first_bad, int *bad_sectors) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 12318fb15a88..c38ae13aadab 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -784,6 +784,7 @@ static int choose_best_rdev(struct r1conf *conf, struct r1bio *r1_bio) if (ctl.readable_disks++ == 1) set_bit(R1BIO_FailFast, &r1_bio->state); + WARN_ON_ONCE(nr_pending_is_percpu_mode(rdev)); pending = nr_pending_read(rdev); dist = abs(r1_bio->sector - conf->mirrors[disk].head_position); @@ -1930,6 +1931,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (err) return err; + percpu_ref_switch_to_atomic_sync(&rdev->nr_pending); raid1_add_conf(conf, rdev, mirror, false); /* As all devices are equivalent, we don't need a full recovery * if this was recently any drive of the array @@ -1949,6 +1951,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) set_bit(Replacement, &rdev->flags); raid1_add_conf(conf, rdev, repl_slot, true); err = 0; + percpu_ref_switch_to_atomic_sync(&rdev->nr_pending); conf->fullsync = 1; } @@ -3208,6 +3211,7 @@ static void raid1_free(struct mddev *mddev, void *priv); static int raid1_run(struct mddev *mddev) { struct r1conf *conf; + struct md_rdev *rdev; int i; int ret; @@ -3269,6 +3273,9 @@ static int raid1_run(struct mddev *mddev) /* * Ok, everything is just fine now */ + rdev_for_each(rdev, mddev) { + percpu_ref_switch_to_atomic_sync(&rdev->nr_pending); + } rcu_assign_pointer(mddev->thread, conf->thread); rcu_assign_pointer(conf->thread, NULL); mddev->private = conf; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index b91dd6c0be5a..66896a1076e1 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -808,6 +808,7 @@ static struct md_rdev *read_balance(struct r10conf *conf, nonrot = bdev_nonrot(rdev->bdev); has_nonrot_disk |= nonrot; + WARN_ON_ONCE(nr_pending_is_percpu_mode(rdev)); pending = nr_pending_read(rdev); if (min_pending > pending && nonrot) { min_pending = pending; @@ -2113,6 +2114,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) p->recovery_disabled = mddev->recovery_disabled - 1; rdev->raid_disk = mirror; err = 0; + percpu_ref_switch_to_atomic_sync(&rdev->nr_pending); if (rdev->saved_raid_disk != mirror) conf->fullsync = 1; WRITE_ONCE(p->rdev, rdev); @@ -2127,6 +2129,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) err = mddev_stack_new_rdev(mddev, rdev); if (err) return err; + percpu_ref_switch_to_atomic_sync(&rdev->nr_pending); conf->fullsync = 1; WRITE_ONCE(p->replacement, rdev); } @@ -4028,6 +4031,7 @@ static int raid10_run(struct mddev *mddev) rdev_for_each(rdev, mddev) { long long diff; + percpu_ref_switch_to_atomic_sync(&rdev->nr_pending); disk_idx = rdev->raid_disk; if (disk_idx < 0) continue; -- 2.34.1