When an IO error happens, reschedule_retry() will increase r1conf::nr_queued, which makes freeze_array() unblocked. However, before all r1bio in the memory pool are released, the memory pool should not be modified. Introduce freeze_array_totally() to solve the problem. Compared to freeze_array(), it's more strict because any in-flight io needs to complete including queued io. Signed-off-by: Xueshi Hu <xueshi.hu@xxxxxxxxxx> --- drivers/md/raid1.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index dd25832eb045..5605c9680818 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1072,7 +1072,7 @@ static void freeze_array(struct r1conf *conf, int extra) /* Stop sync I/O and normal I/O and wait for everything to * go quiet. * This is called in two situations: - * 1) management command handlers (reshape, remove disk, quiesce). + * 1) management command handlers (remove disk, quiesce). * 2) one normal I/O request failed. * After array_frozen is set to 1, new sync IO will be blocked at @@ -1111,6 +1111,37 @@ static void unfreeze_array(struct r1conf *conf) wake_up(&conf->wait_barrier); } +/* conf->resync_lock should be held */ +static int get_pending(struct r1conf *conf) +{ + int idx, ret; + + ret = atomic_read(&conf->nr_sync_pending); + for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++) + ret += atomic_read(&conf->nr_pending[idx]); + + return ret; +} + +static void freeze_array_totally(struct r1conf *conf) +{ + /* + * freeze_array_totally() is almost the same with freeze_array() except + * it requires there's no queued io. Raid1's reshape will destroy the + * old mempool and change r1conf::raid_disks, which are necessary when + * freeing the queued io. + */ + spin_lock_irq(&conf->resync_lock); + conf->array_frozen = 1; + raid1_log(conf->mddev, "freeze totally"); + wait_event_lock_irq_cmd( + conf->wait_barrier, + get_pending(conf) == 0, + conf->resync_lock, + md_wakeup_thread(conf->mddev->thread)); + spin_unlock_irq(&conf->resync_lock); +} + static void alloc_behind_master_bio(struct r1bio *r1_bio, struct bio *bio) { @@ -3296,7 +3327,7 @@ static int raid1_reshape(struct mddev *mddev) return -ENOMEM; } - freeze_array(conf, 0); + freeze_array_totally(conf); /* ok, everything is stopped */ oldpool = conf->r1bio_pool; -- 2.40.1