[PATCH v3 1/3] md/raid1: freeze array more strictly when reshape

Xueshi Hu <xueshi.hu@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx> · Wed, 19 Jul 2023 15:09:52 +0800

When an IO error happens, reschedule_retry() will increase
r1conf::nr_queued, which makes freeze_array() unblocked. However, before
all r1bio in the memory pool are released, the memory pool should not be
modified. Introduce freeze_array_totally() to solve the problem. Compared
to freeze_array(), it's more strict because any in-flight io needs to
complete including queued io.

Signed-off-by: Xueshi Hu <xueshi.hu@xxxxxxxxxx>
---
 drivers/md/raid1.c | 35 +++++++++++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index dd25832eb045..5605c9680818 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1072,7 +1072,7 @@ static void freeze_array(struct r1conf *conf, int extra)
 	/* Stop sync I/O and normal I/O and wait for everything to
 	 * go quiet.
 	 * This is called in two situations:
-	 * 1) management command handlers (reshape, remove disk, quiesce).
+	 * 1) management command handlers (remove disk, quiesce).
 	 * 2) one normal I/O request failed.
 
 	 * After array_frozen is set to 1, new sync IO will be blocked at
@@ -1111,6 +1111,37 @@ static void unfreeze_array(struct r1conf *conf)
 	wake_up(&conf->wait_barrier);
 }
 
+/* conf->resync_lock should be held */
+static int get_pending(struct r1conf *conf)
+{
+	int idx, ret;
+
+	ret = atomic_read(&conf->nr_sync_pending);
+	for (idx = 0; idx < BARRIER_BUCKETS_NR; idx++)
+		ret += atomic_read(&conf->nr_pending[idx]);
+
+	return ret;
+}
+
+static void freeze_array_totally(struct r1conf *conf)
+{
+	/*
+	 * freeze_array_totally() is almost the same with freeze_array() except
+	 * it requires there's no queued io. Raid1's reshape will destroy the
+	 * old mempool and change r1conf::raid_disks, which are necessary when
+	 * freeing the queued io.
+	 */
+	spin_lock_irq(&conf->resync_lock);
+	conf->array_frozen = 1;
+	raid1_log(conf->mddev, "freeze totally");
+	wait_event_lock_irq_cmd(
+			conf->wait_barrier,
+			get_pending(conf) == 0,
+			conf->resync_lock,
+			md_wakeup_thread(conf->mddev->thread));
+	spin_unlock_irq(&conf->resync_lock);
+}
+
 static void alloc_behind_master_bio(struct r1bio *r1_bio,
 					   struct bio *bio)
 {
@@ -3296,7 +3327,7 @@ static int raid1_reshape(struct mddev *mddev)
 		return -ENOMEM;
 	}
 
-	freeze_array(conf, 0);
+	freeze_array_totally(conf);
 
 	/* ok, everything is stopped */
 	oldpool = conf->r1bio_pool;
-- 
2.40.1