On Tue, Nov 28, 2023 at 8:32 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > Currently sync thread is stopped from multiple contex: > - idle_sync_thread > - frozen_sync_thread > - __md_stop_writes > - md_set_readonly > - do_md_stop > > And there are some problems: > 1) sync_work is flushed while reconfig_mutex is grabbed, this can > deadlock because the work function will grab reconfig_mutex as well. > 2) md_reap_sync_thread() can't be called directly while md_do_sync() is > not finished yet, for example, commit 130443d60b1b ("md: refactor > idle/frozen_sync_thread() to fix deadlock"). > 3) If MD_RECOVERY_RUNNING is not set, there is no need to stop > sync_thread at all because sync_thread must not be registered. > > Factor out a helper prepare_to_stop_sync_thread(), so that above contex > will behave the same. Fix 1) by flushing sync_work after reconfig_mutex > is released, before waiting for sync_thread to be done; Fix 2) bt > letting daemon thread to unregister sync_thread; Fix 3) by always > checking MD_RECOVERY_RUNNING first. > > Fixes: db5e653d7c9f ("md: delay choosing sync action to md_start_sync()") > Acked-by: Xiao Ni <xni@xxxxxxxxxx> > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> > --- > drivers/md/md.c | 96 +++++++++++++++++++++++-------------------------- > 1 file changed, 45 insertions(+), 51 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 2d8e45a1af23..05902e36db66 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -4840,26 +4840,9 @@ action_show(struct mddev *mddev, char *page) > return sprintf(page, "%s\n", type); > } > > -static void stop_sync_thread(struct mddev *mddev) > +static void prepare_to_stop_sync_thread(struct mddev *mddev) > + __releases(&mddev->reconfig_mutex) > { > - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) > - return; > - > - if (mddev_lock(mddev)) > - return; > - > - /* > - * Check again in case MD_RECOVERY_RUNNING is cleared before lock is > - * held. > - */ > - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { > - mddev_unlock(mddev); > - return; > - } > - > - if (work_pending(&mddev->sync_work)) > - flush_workqueue(md_misc_wq); > - > set_bit(MD_RECOVERY_INTR, &mddev->recovery); > /* > * Thread might be blocked waiting for metadata update which will now > @@ -4868,6 +4851,8 @@ static void stop_sync_thread(struct mddev *mddev) > md_wakeup_thread_directly(mddev->sync_thread); > > mddev_unlock(mddev); > + if (work_pending(&mddev->sync_work)) > + flush_work(&mddev->sync_work); > } > > static void idle_sync_thread(struct mddev *mddev) > @@ -4876,10 +4861,20 @@ static void idle_sync_thread(struct mddev *mddev) > > mutex_lock(&mddev->sync_mutex); > clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > - stop_sync_thread(mddev); > > - wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) || > + if (mddev_lock(mddev)) { > + mutex_unlock(&mddev->sync_mutex); > + return; > + } > + > + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { > + prepare_to_stop_sync_thread(mddev); > + wait_event(resync_wait, > + sync_seq != atomic_read(&mddev->sync_seq) || > !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); > + } else { > + mddev_unlock(mddev); > + } We have a few patterns like this: if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { prepare_to_stop_sync_thread(mddev); wait_event(resync_wait, sync_seq != atomic_read(&mddev->sync_seq) || !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); } else { mddev_unlock(mddev); } This is pretty confusing. Maybe try something like (untested) static void stop_sync_thread(struct mddev *mddev, bool do_unlock, bool check_sync_seq) { if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { if (do_unlock) mddev_unlock(mddev); return; } set_bit(MD_RECOVERY_INTR, &mddev->recovery); /* * Thread might be blocked waiting for metadata update which will now * never happen */ md_wakeup_thread_directly(mddev->sync_thread); mddev_unlock(mddev); if (work_pending(&mddev->sync_work)) flush_work(&mddev->sync_work); wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || (check_sync_seq && sync_seq != atomic_read(&mddev->sync_seq))); if (!do_unlock) mddev_lock_nointr(mddev); } Thanks, Song