Re: [md PATCH 3/5] md: move suspend_hi/lo handling into core md code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Oct 17, 2017 at 01:46:43PM +1100, Neil Brown wrote:
> responding to ->suspend_lo and ->suspend_hi is similar
> to responding to ->suspended.  It is best to wait in
> the common core code without incrementing ->active_io.
> This allows mddev_suspend()/mddev_resume() to work while
> requests are waiting for suspend_lo/hi to change.
> This is will be important after a subsequent patch
> which uses mddev_suspend() to synchronize updating for
> suspend_lo/hi.
> 
> So move the code for testing suspend_lo/hi out of raid1.c
> and raid5.c, and place it in md.c
> 
> Signed-off-by: NeilBrown <neilb@xxxxxxxx>
> ---
>  drivers/md/md.c    |   29 +++++++++++++++++++++++------
>  drivers/md/raid1.c |   12 ++++--------
>  drivers/md/raid5.c |   22 ----------------------
>  3 files changed, 27 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 308456842d3e..f8d0e41120cb 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
>   * call has finished, the bio has been linked into some internal structure
>   * and so is visible to ->quiesce(), so we don't need the refcount any more.
>   */
> +static bool is_suspended(struct mddev *mddev, struct bio *bio)
> +{
> +	if (mddev->suspended)
> +		return true;
> +	if (bio_data_dir(bio) != WRITE)
> +		return false;
> +	if (mddev->suspend_lo >= mddev->suspend_hi)
> +		return false;
> +	if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
> +		return false;
> +	if (bio_end_sector(bio) < mddev->suspend_lo)
> +		return false;
> +	return true;
> +}
> +
>  void md_handle_request(struct mddev *mddev, struct bio *bio)
>  {
>  check_suspended:
>  	rcu_read_lock();
> -	if (mddev->suspended) {
> +	if (is_suspended(mddev, bio)) {
>  		DEFINE_WAIT(__wait);
>  		for (;;) {
>  			prepare_to_wait(&mddev->sb_wait, &__wait,
>  					TASK_UNINTERRUPTIBLE);
> -			if (!mddev->suspended)
> +			if (!is_suspended(mddev, bio))
>  				break;
>  			rcu_read_unlock();
>  			schedule();
> @@ -4843,10 +4858,11 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
>  		goto unlock;
>  	old = mddev->suspend_lo;
>  	mddev->suspend_lo = new;
> -	if (new >= old)
> +	if (new >= old) {
>  		/* Shrinking suspended region */
> +		wake_up(&mddev->sb_wait);
>  		mddev->pers->quiesce(mddev, 2);

Do we still need the quiesce(mddev, 2)? sounds not to me.

> -	else {
> +	} else {
>  		/* Expanding suspended region - need to wait */
>  		mddev->pers->quiesce(mddev, 1);
>  		mddev->pers->quiesce(mddev, 0);
> @@ -4886,10 +4902,11 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
>  		goto unlock;
>  	old = mddev->suspend_hi;
>  	mddev->suspend_hi = new;
> -	if (new <= old)
> +	if (new <= old) {
>  		/* Shrinking suspended region */
> +		wake_up(&mddev->sb_wait);
>  		mddev->pers->quiesce(mddev, 2);
> -	else {
> +	} else {
>  		/* Expanding suspended region - need to wait */
>  		mddev->pers->quiesce(mddev, 1);
>  		mddev->pers->quiesce(mddev, 0);
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index f3f3e40dc9d8..277a145b3ff5 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1310,11 +1310,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
>  	 */
>  
>  
> -	if ((bio_end_sector(bio) > mddev->suspend_lo &&
> -	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
> -	    (mddev_is_clustered(mddev) &&
> +	if (mddev_is_clustered(mddev) &&
>  	     md_cluster_ops->area_resyncing(mddev, WRITE,
> -		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
> +		     bio->bi_iter.bi_sector, bio_end_sector(bio))) {
>  
>  		/*
>  		 * As the suspend_* range is controlled by userspace, we want
> @@ -1325,12 +1323,10 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
>  			sigset_t full, old;
>  			prepare_to_wait(&conf->wait_barrier,
>  					&w, TASK_INTERRUPTIBLE);
> -			if (bio_end_sector(bio) <= mddev->suspend_lo ||
> -			    bio->bi_iter.bi_sector >= mddev->suspend_hi ||
> -			    (mddev_is_clustered(mddev) &&
> +			if (mddev_is_clustered(mddev) &&
>  			     !md_cluster_ops->area_resyncing(mddev, WRITE,
>  				     bio->bi_iter.bi_sector,
> -				     bio_end_sector(bio))))
> +				     bio_end_sector(bio)))
>  				break;
>  			sigfillset(&full);
>  			sigprocmask(SIG_BLOCK, &full, &old);
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 928e24a07133..4f40ccd21cbb 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5682,28 +5682,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
>  				goto retry;
>  			}
>  
> -			if (rw == WRITE &&
> -			    logical_sector >= mddev->suspend_lo &&
> -			    logical_sector < mddev->suspend_hi) {
> -				raid5_release_stripe(sh);
> -				/* As the suspend_* range is controlled by
> -				 * userspace, we want an interruptible
> -				 * wait.
> -				 */
> -				prepare_to_wait(&conf->wait_for_overlap,
> -						&w, TASK_INTERRUPTIBLE);
> -				if (logical_sector >= mddev->suspend_lo &&
> -				    logical_sector < mddev->suspend_hi) {
> -					sigset_t full, old;
> -					sigfillset(&full);
> -					sigprocmask(SIG_BLOCK, &full, &old);
> -					schedule();
> -					sigprocmask(SIG_SETMASK, &old, NULL);
> -					do_prepare = true;
> -				}
> -				goto retry;
> -			}
> -
>  			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
>  			    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
>  				/* Stripe is busy expanding or

I think we can remove the state == 2 branch in raid5_quiesce().

This leaves only raid1 and md-cluster needs the quiesce(2) calls. In the
future, we probably should abstract that md-cluster logic to separate API and
let raid1 use it for waitting. The quiesce(2) is a strange name for the wait
purpose.

Thanks,
Shaohua

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux