Re: [PATCH -next 6/8] md: factor out a helper to stop sync_thread

Xiao Ni <xni@xxxxxxxxxx> · Tue, 21 Nov 2023 14:34:02 +0800

On Tue, Nov 21, 2023 at 2:02 PM Xiao Ni <xni@xxxxxxxxxx> wrote:
>
> On Fri, Nov 10, 2023 at 5:34 PM Yu Kuai <yukuai1@xxxxxxxxxxxxxxx> wrote:
> >
> > From: Yu Kuai <yukuai3@xxxxxxxxxx>
> >
> > stop_sync_thread(), md_set_readonly() and do_md_stop() are trying to
> > stop sync_thread() the same way, hence factor out a helper to make code
> > cleaner, and also prepare to use the new helper to fix problems later.
> >
> > Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx>
> > Signed-off-by: Yu Kuai <yukuai1@xxxxxxxxxxxxxxx>
> > ---
> >  drivers/md/md.c | 129 ++++++++++++++++++++++++++----------------------
> >  1 file changed, 69 insertions(+), 60 deletions(-)
> >
> > diff --git a/drivers/md/md.c b/drivers/md/md.c
> > index c0f2bdafe46a..7252fae0c989 100644
> > --- a/drivers/md/md.c
> > +++ b/drivers/md/md.c
> > @@ -4848,29 +4848,46 @@ action_show(struct mddev *mddev, char *page)
> >         return sprintf(page, "%s\n", type);
> >  }
> >
> > -static int stop_sync_thread(struct mddev *mddev)
> > +static bool sync_thread_stopped(struct mddev *mddev, int *seq_ptr)
> >  {
> > -       int ret = 0;
> > +       if (seq_ptr && *seq_ptr != atomic_read(&mddev->sync_seq))
> > +               return true;
> >
> > -       if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > -               return 0;
> > +       return (!mddev->sync_thread &&
> > +               !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > +}
> >
> > -       ret = mddev_lock(mddev);
> > -       if (ret)
> > -               return ret;
> > +/*
> > + * stop_sync_thread() - stop running sync_thread.
> > + * @mddev: the array that sync_thread belongs to.
> > + * @freeze: set true to prevent new sync_thread to start.
> > + * @interruptible: if set true, then user can interrupt while waiting for
> > + * sync_thread to be done.
> > + *
> > + * Noted that this function must be called with 'reconfig_mutex' grabbed, and
> > + * fter this function return, 'reconfig_mtuex' will be released.
> > + */
> > +static int stop_sync_thread(struct mddev *mddev, bool freeze,
> > +                           bool interruptible)
> > +       __releases(&mddev->reconfig_mutex)
> > +{
> > +       int *seq_ptr = NULL;
> > +       int sync_seq;
> > +       int ret = 0;
> > +
> > +       if (freeze) {
> > +               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +       } else {
> > +               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +               sync_seq = atomic_read(&mddev->sync_seq);
> > +               seq_ptr = &sync_seq;
> > +       }
> >
> > -       /*
> > -        * Check again in case MD_RECOVERY_RUNNING is cleared before lock is
> > -        * held.
> > -        */
> >         if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> >                 mddev_unlock(mddev);
> >                 return 0;
> >         }
> Hi Kuai
>
> It does the unlock inside this function. For me, it's not good,
> because the caller does the lock. So the caller should do the unlock
> too.
> >
> > -       if (work_pending(&mddev->sync_work))
> > -               flush_workqueue(md_misc_wq);
> > -
> >         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> >         /*
> >          * Thread might be blocked waiting for metadata update which will now
> > @@ -4879,53 +4896,58 @@ static int stop_sync_thread(struct mddev *mddev)
> >         md_wakeup_thread_directly(mddev->sync_thread);
> >
> >         mddev_unlock(mddev);
>
> Same with above point.
>
> > -       return 0;
> > +       if (work_pending(&mddev->sync_work))
> > +               flush_work(&mddev->sync_work);
> > +
> > +       if (interruptible)
> > +               ret = wait_event_interruptible(resync_wait,
> > +                                       sync_thread_stopped(mddev, seq_ptr));
> > +       else
> > +               wait_event(resync_wait, sync_thread_stopped(mddev, seq_ptr));
> > +
>
> It looks like the three roles (md_set_readonly, do_md_stop and
> stop_sync_thread) need to wait for different events. We can move these
> codes out this helper function and make this helper function to be
> more common.

Or get lock again before returning this function and leave the wait here?

Regards
Xiao

>
> Best Regards
> Xiao
>
>
> > +       return ret;
> >  }
> >
> >  static int idle_sync_thread(struct mddev *mddev)
> >  {
> >         int ret;
> > -       int sync_seq = atomic_read(&mddev->sync_seq);
> >         bool flag;
> >
> >         ret = mutex_lock_interruptible(&mddev->sync_mutex);
> >         if (ret)
> >                 return ret;
> >
> > -       flag = test_and_clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > -       ret = stop_sync_thread(mddev);
> > +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +       ret = mddev_lock(mddev);
> >         if (ret)
> > -               goto out;
> > +               goto unlock;
> >
> > -       ret = wait_event_interruptible(resync_wait,
> > -                       sync_seq != atomic_read(&mddev->sync_seq) ||
> > -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > -out:
> > +       ret = stop_sync_thread(mddev, false, true);
> >         if (ret && flag)
> >                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +unlock:
> >         mutex_unlock(&mddev->sync_mutex);
> >         return ret;
> >  }
> >
> >  static int frozen_sync_thread(struct mddev *mddev)
> >  {
> > -       int ret = mutex_lock_interruptible(&mddev->sync_mutex);
> > +       int ret;
> >         bool flag;
> >
> > +       ret = mutex_lock_interruptible(&mddev->sync_mutex);
> >         if (ret)
> >                 return ret;
> >
> > -       flag = test_and_set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > -       ret = stop_sync_thread(mddev);
> > +       flag = test_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +       ret = mddev_lock(mddev);
> >         if (ret)
> > -               goto out;
> > +               goto unlock;
> >
> > -       ret = wait_event_interruptible(resync_wait,
> > -                       mddev->sync_thread == NULL &&
> > -                       !test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
> > -out:
> > +       ret = stop_sync_thread(mddev, true, true);
> >         if (ret && !flag)
> >                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > +unlock:
> >         mutex_unlock(&mddev->sync_mutex);
> >         return ret;
> >  }
> > @@ -6397,22 +6419,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> >         if (mddev->external && test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
> >                 return -EBUSY;
> >
> > -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> > +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> >                 did_freeze = 1;
> > -               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> > -       }
> > -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> >
> > -       /*
> > -        * Thread might be blocked waiting for metadata update which will now
> > -        * never happen
> > -        */
> > -       md_wakeup_thread_directly(mddev->sync_thread);
> > -
> > -       mddev_unlock(mddev);
> > -       wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
> > -                                         &mddev->recovery));
> > +       stop_sync_thread(mddev, true, false);
> >         wait_event(mddev->sb_wait,
> >                    !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
> >         mddev_lock_nointr(mddev);
> > @@ -6421,6 +6431,10 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
> >         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> >             mddev->sync_thread ||
> >             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > +               /*
> > +                * This could happen if user change array state through
> > +                * ioctl/sysfs while reconfig_mutex is released.
> > +                */
> >                 pr_warn("md: %s still in use.\n",mdname(mddev));
> >                 err = -EBUSY;
> >                 goto out;
> > @@ -6457,30 +6471,25 @@ static int do_md_stop(struct mddev *mddev, int mode,
> >         struct md_rdev *rdev;
> >         int did_freeze = 0;
> >
> > -       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
> > +       if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
> >                 did_freeze = 1;
> > +
> > +       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > +               stop_sync_thread(mddev, true, false);
> > +               mddev_lock_nointr(mddev);
> > +       } else {
> >                 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> >         }
> > -       if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> > -               set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> > -
> > -       /*
> > -        * Thread might be blocked waiting for metadata update which will now
> > -        * never happen
> > -        */
> > -       md_wakeup_thread_directly(mddev->sync_thread);
> > -
> > -       mddev_unlock(mddev);
> > -       wait_event(resync_wait, (mddev->sync_thread == NULL &&
> > -                                !test_bit(MD_RECOVERY_RUNNING,
> > -                                          &mddev->recovery)));
> > -       mddev_lock_nointr(mddev);
> >
> >         mutex_lock(&mddev->open_mutex);
> >         if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) ||
> >             mddev->sysfs_active ||
> >             mddev->sync_thread ||
> >             test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
> > +               /*
> > +                * This could happen if user change array state through
> > +                * ioctl/sysfs while reconfig_mutex is released.
> > +                */
> >                 pr_warn("md: %s still in use.\n",mdname(mddev));
> >                 mutex_unlock(&mddev->open_mutex);
> >                 if (did_freeze) {
> > --
> > 2.39.2
> >