Hi all It's good to put the common codes into one function. Before this, I want to check a problem. Does idle_sync_thread need to stop sync thread? The sync thread can be run again immediately after stopping the sync thread when echo idle > sync_action. It looks like there is no meaning to stop the sync thread for idle_sync_thread. If we don't need to stop sync thread in idle_sync_thread, there is no need to introduce mddev->sync_seq and only needs to clear MD_RECOVERY_FROZEN in idle_sync_thread. Then it can make this patch simpler. Something like this diff --git a/drivers/md/md.c b/drivers/md/md.c index 3484a0fc4d2a..34245c4c71b8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -716,7 +716,6 @@ int mddev_init(struct mddev *mddev) timer_setup(&mddev->safemode_timer, md_safemode_timeout, 0); atomic_set(&mddev->active, 1); atomic_set(&mddev->openers, 0); - atomic_set(&mddev->sync_seq, 0); spin_lock_init(&mddev->lock); atomic_set(&mddev->flush_pending, 0); init_waitqueue_head(&mddev->sb_wait); @@ -4848,38 +4847,33 @@ action_show(struct mddev *mddev, char *page) return sprintf(page, "%s\n", type); } -static int stop_sync_thread(struct mddev *mddev) +static void stop_sync_thread(struct mddev *mddev) { int ret = 0; - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) - return 0; - ret = mddev_lock(mddev); - if (ret) - return ret; + if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + return; - /* - * Check again in case MD_RECOVERY_RUNNING is cleared before lock is - * held. - */ - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { - mddev_unlock(mddev); - return 0; + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { + did_freeze = 1; + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + md_wakeup_thread(mddev->thread); } - if (work_pending(&mddev->sync_work)) - flush_workqueue(md_misc_wq); - set_bit(MD_RECOVERY_INTR, &mddev->recovery); + /* * Thread might be blocked waiting for metadata update which will now * never happen */ md_wakeup_thread_directly(mddev->sync_thread); - mddev_unlock(mddev); - return 0; + mddev_unlock(mddev); + wait_event(resync_wait, (mddev->sync_thread == NULL && + !test_bit(MD_RECOVERY_RUNNING, + &mddev->recovery))); + mddev_lock_nointr(mddev); } static int idle_sync_thread(struct mddev *mddev) @@ -4891,8 +4885,14 @@ static int idle_sync_thread(struct mddev *mddev) if (ret) return ret; - mddev_lock(mddev); - test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + ret = mddev_lock(mddev); + if (ret) { + mutex_unlock(&mddev->sync_mutex); + return ret; + } + + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + mddev_unlock(mddev); mutex_unlock(&mddev->sync_mutex); return ret; @@ -4906,17 +4906,15 @@ static int frozen_sync_thread(struct mddev *mddev) if (ret) return ret; - flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - ret = stop_sync_thread(mddev); - if (ret) - goto out; + ret = mddev_lock(mddev); + if (ret) { + mutex_unlock(&mddev->sync_mutex); + return ret; + } - ret = wait_event_interruptible(resync_wait, - mddev->sync_thread == NULL && - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); -out: - if (ret && !flag) - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + stop_sync_thread(mddev); + + mddev_unlock(mddev); mutex_unlock(&mddev->sync_mutex); return ret; } @@ -6388,22 +6386,9 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) return -EBUSY; - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { - did_freeze = 1; - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - } - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - - /* - * Thread might be blocked waiting for metadata update which will now - * never happen - */ - md_wakeup_thread_directly(mddev->sync_thread); + stop_sync_thread(mddev); mddev_unlock(mddev); - wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, - &mddev->recovery)); wait_event(mddev->sb_wait, !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); mddev_lock_nointr(mddev); @@ -6448,25 +6433,8 @@ static int do_md_stop(struct mddev *mddev, int mode, struct md_rdev *rdev; int did_freeze = 0; - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { - did_freeze = 1; - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - } - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) - set_bit(MD_RECOVERY_INTR, &mddev->recovery); - - /* - * Thread might be blocked waiting for metadata update which will now - * never happen - */ - md_wakeup_thread_directly(mddev->sync_thread); - - mddev_unlock(mddev); - wait_event(resync_wait, (mddev->sync_thread == NULL && - !test_bit(MD_RECOVERY_RUNNING, - &mddev->recovery))); - mddev_lock_nointr(mddev); - + stop_sync_thread(mddev); + mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sysfs_active || @@ -9622,7 +9590,6 @@ void md_reap_sync_thread(struct mddev *mddev) /* resync has finished, collect result */ md_unregister_thread(mddev, &mddev->sync_thread); - atomic_inc(&mddev->sync_seq); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && Best Regards Xiao On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote: > > From: Yu Kuai <yukuai3@xxxxxxxxxx> > > stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to > stop sync_thread() the same way, hence factor out a helper to make code > cleaner, and also prepare to use the new helper to fix problems later. > > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> > Signed-off-by: Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> > --- > drivers/md/md.c | 129 ++++++++++++++++++++++++++---------------------- > 1 file changed, 69 insertions(+), 60 deletions(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index c0f2bdafe46a..7252fae0c989 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page) > return sprintf(page, "%s\n", type); > } > > -static int stop_sync_thread(struct mddev *mddev) > +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr) > { > - int ret = 0; > + if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq)) > + return true; > > - if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) > - return 0; > + return (!mddev->sync_thread && > + !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); > +} > > - ret = mddev_lock(mddev); > - if (ret) > - return ret; > +/* > + * stop_sync_thread() - stop running sync_thread. > + * @mddev: the array that sync_thread belongs to. > + * @freeze: set true to prevent new sync_thread to start. > + * @interruptible: if set true, then user can interrupt while waiting for > + * sync_thread to be done. > + * > + * Noted that this function must be called with 'reconfig_mutex' grabbed, and > + * fter this function return, 'reconfig_mtuex' will be released. > + */ > +static int stop_sync_thread(struct mddev *mddev, bool freeze, > + bool interruptible) > + __releases(&mddev->reconfig_mutex) > +{ > + int *seq_ptr = NULL; > + int sync_seq; > + int ret = 0; > + > + if (freeze) { > + set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + } else { > + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + sync_seq = atomic_read(&mddev->sync_seq); > + seq_ptr = &sync_seq; > + } > > - /* > - * Check again in case MD_RECOVERY_RUNNING is cleared before lock is > - * held. > - */ > if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { > mddev_unlock(mddev); > return 0; > } > > - if (work_pending(&mddev->sync_work)) > - flush_workqueue(md_misc_wq); > - > set_bit(MD_RECOVERY_INTR, &mddev->recovery); > /* > * Thread might be blocked waiting for metadata update which will now > @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev) > md_wakeup_thread_directly(mddev->sync_thread); > > mddev_unlock(mddev); > - return 0; > + if (work_pending(&mddev->sync_work)) > + flush_work(&mddev->sync_work); > + > + if (interruptible) > + ret = wait_event_interruptible(resync_wait, > + sync_thread_stopped(mddev, seq_ptr)); > + else > + wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr)); > + > + return ret; > } > > static int idle_sync_thread(struct mddev *mddev) > { > int ret; > - int sync_seq = atomic_read(&mddev->sync_seq); > bool flag; > > ret = mutex_lock_interruptible(&mddev->sync_mutex); > if (ret) > return ret; > > - flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > - ret = stop_sync_thread(mddev); > + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + ret = mddev_lock(mddev); > if (ret) > - goto out; > + goto unlock; > > - ret = wait_event_interruptible(resync_wait, > - sync_seq != atomic_read(&mddev->sync_seq) || > - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); > -out: > + ret = stop_sync_thread(mddev, false, true); > if (ret && flag) > set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > +unlock: > mutex_unlock(&mddev->sync_mutex); > return ret; > } > > static int frozen_sync_thread(struct mddev *mddev) > { > - int ret = mutex_lock_interruptible(&mddev->sync_mutex); > + int ret; > bool flag; > > + ret = mutex_lock_interruptible(&mddev->sync_mutex); > if (ret) > return ret; > > - flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > - ret = stop_sync_thread(mddev); > + flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > + ret = mddev_lock(mddev); > if (ret) > - goto out; > + goto unlock; > > - ret = wait_event_interruptible(resync_wait, > - mddev->sync_thread == NULL && > - !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)); > -out: > + ret = stop_sync_thread(mddev, true, true); > if (ret && !flag) > clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > +unlock: > mutex_unlock(&mddev->sync_mutex); > return ret; > } > @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) > if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) > return -EBUSY; > > - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { > + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) > did_freeze = 1; > - set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > - } > - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) > - set_bit(MD_RECOVERY_INTR, &mddev->recovery); > > - /* > - * Thread might be blocked waiting for metadata update which will now > - * never happen > - */ > - md_wakeup_thread_directly(mddev->sync_thread); > - > - mddev_unlock(mddev); > - wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING, > - &mddev->recovery)); > + stop_sync_thread(mddev, true, false); > wait_event(mddev->sb_wait, > !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); > mddev_lock_nointr(mddev); > @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) > if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || > mddev->sync_thread || > test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { > + /* > + * This could happen if user change array state through > + * ioctl/sysfs while reconfig_mutex is released. > + */ > pr_warn("md: %s still in use.\n",mdname(mddev)); > err = -EBUSY; > goto out; > @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode, > struct md_rdev *rdev; > int did_freeze = 0; > > - if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) { > + if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) > did_freeze = 1; > + > + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { > + stop_sync_thread(mddev, true, false); > + mddev_lock_nointr(mddev); > + } else { > set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > } > - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) > - set_bit(MD_RECOVERY_INTR, &mddev->recovery); > - > - /* > - * Thread might be blocked waiting for metadata update which will now > - * never happen > - */ > - md_wakeup_thread_directly(mddev->sync_thread); > - > - mddev_unlock(mddev); > - wait_event(resync_wait, (mddev->sync_thread == NULL && > - !test_bit(MD_RECOVERY_RUNNING, > - &mddev->recovery))); > - mddev_lock_nointr(mddev); > > mutex_lock(&mddev->open_mutex); > if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || > mddev->sysfs_active || > mddev->sync_thread || > test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { > + /* > + * This could happen if user change array state through > + * ioctl/sysfs while reconfig_mutex is released. > + */ > pr_warn("md: %s still in use.\n",mdname(mddev)); > mutex_unlock(&mddev->open_mutex); > if (did_freeze) { > -- > 2.39.2 >