From: Yu Kuai <yukuai3@xxxxxxxxxx> submit_flushes atomic_set(&mddev->flush_pending, 1); rdev_for_each_rcu(rdev, mddev) atomic_inc(&mddev->flush_pending); bi->bi_end_io = md_end_flush submit_bio(bi); /* flush io is done first */ md_end_flush if (atomic_dec_and_test(&mddev->flush_pending)) percpu_ref_put(&mddev->active_io) -> active_io is not released if (atomic_dec_and_test(&mddev->flush_pending)) -> missing release of active_io For consequence, mddev_suspend() will wait for 'active_io' to be zero forever. Fix this problem by releasing 'active_io' in submit_flushes() if 'flush_pending' is decreased to zero. Fixes: fa2bbff7b0b4 ("md: synchronize flush io with array reconfiguration") Reported-by: Blazej Kucman <blazej.kucman@xxxxxxxxxxxxxxx> Closes: https://lore.kernel.org/lkml/20240130172524.0000417b@xxxxxxxxxxxxxxx/ Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 85fde05c37dd..9e41a9aaba8b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -579,8 +579,12 @@ static void submit_flushes(struct work_struct *ws) rcu_read_lock(); } rcu_read_unlock(); - if (atomic_dec_and_test(&mddev->flush_pending)) + if (atomic_dec_and_test(&mddev->flush_pending)) { + /* The pair is percpu_ref_get() from md_flush_request() */ + percpu_ref_put(&mddev->active_io); + queue_work(md_wq, &mddev->flush_work); + } } static void md_submit_flush_data(struct work_struct *ws) -- 2.39.2