Re: [PATCH v3 2/2] md: introduce new personality funciton start()

Shaohua Li <shli@xxxxxxxxxx> · Sun, 19 Nov 2017 20:09:21 -0800

On Fri, Nov 17, 2017 at 05:23:12PM -0800, Song Liu wrote:
> In do_md_run(), md threads should not wake up until the array is fully
> initialized in md_run(). However, in raid5_run(), raid5-cache may wake
> up mddev->thread to flush stripes that need to be written back. This
> design doesn't break badly right now. But it could lead to bad bug in
> the future.
> 
> This patch tries to resolve this problem by splitting start up work
> into two personality functions, run() and start(). Tasks that do not
> require the md threads should go into run(), while task that require
> the md threads go into start().
> 
> r5l_load_log() is moved to raid5_start(), so it is not called until
> the md threads are started in do_md_run().
> 
> Signed-off-by: Song Liu <songliubraving@xxxxxx>
> ---
>  drivers/md/dm-raid.c     |  8 ++++++++
>  drivers/md/md.c          | 32 ++++++++++++++++++++++++++------
>  drivers/md/md.h          |  7 +++++++
>  drivers/md/raid5-cache.c | 22 +++++++++++++++++-----
>  drivers/md/raid5-log.h   |  1 +
>  drivers/md/raid5.c       | 10 ++++++++++
>  6 files changed, 69 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
> index 8b1d931..83dc6ba 100644
> --- a/drivers/md/dm-raid.c
> +++ b/drivers/md/dm-raid.c
> @@ -3156,6 +3156,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  		goto bad;
>  	}
>  
> +	r = md_start(&rs->md);
> +
> +	if (r) {
> +		ti->error = "Failed to start raid array";
> +		mddev_unlock(&rs->md);
> +		goto bad;
> +	}

shouldn't we call md_stop for the failure?

> +
>  	rs->callbacks.congested_fn = raid_is_congested;
>  	dm_table_add_target_callbacks(ti->table, &rs->callbacks);
>  
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index e014d39..f930449 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -5561,11 +5561,6 @@ int md_run(struct mddev *mddev)
>  	if (start_readonly && mddev->ro == 0)
>  		mddev->ro = 2; /* read-only, but switch on first write */
>  
> -	/*
> -	 * NOTE: some pers->run(), for example r5l_recovery_log(), wakes
> -	 * up mddev->thread. It is important to initialize critical
> -	 * resources for mddev->thread BEFORE calling pers->run().
> -	 */
>  	err = pers->run(mddev);
>  	if (err)
>  		pr_warn("md: pers->run() failed ...\n");
> @@ -5679,7 +5674,17 @@ static int do_md_run(struct mddev *mddev)
>  	if (mddev_is_clustered(mddev))
>  		md_allow_write(mddev);
>  
> +	set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
>  	md_wakeup_thread(mddev->thread);
> +	/* run start up tasks that require md_thread */
> +	if (mddev->pers->start) {
> +		err = mddev->pers->start(mddev);
> +		if (err) {
> +			bitmap_destroy(mddev);
> +			goto out;
> +		}
> +	}

we'd better not open code the logic here, just call md_start. I think seting
the MD_RECOVERY_WAIT bit twice isn't a problem.

> +	clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
>  	md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
>  
>  	set_capacity(mddev->gendisk, mddev->array_sectors);
> @@ -5690,6 +5695,20 @@ static int do_md_run(struct mddev *mddev)
>  	return err;
>  }
>  
> +int md_start(struct mddev *mddev)
> +{
> +	int ret;
> +
> +	if (mddev->pers->start) {
> +		set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
> +		ret = mddev->pers->start(mddev);
> +		clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);

should we call md_wakeup_thread(mddev->sync_thread); here? I think we should to
avoid potential problems.

> +		return ret;
> +	}
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(md_start);
> +
>  static int restart_array(struct mddev *mddev)
>  {
>  	struct gendisk *disk = mddev->gendisk;
> @@ -8168,7 +8187,8 @@ void md_do_sync(struct md_thread *thread)
>  	int ret;
>  
>  	/* just incase thread restarts... */
> -	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
> +	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
> +	    test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
>  		return;
>  	if (mddev->ro) {/* never try to sync a read-only array */
>  		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 7d6bcf0..7dcc7e6 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -485,6 +485,7 @@ enum recovery_flags {
>  	MD_RECOVERY_RESHAPE,	/* A reshape is happening */
>  	MD_RECOVERY_FROZEN,	/* User request to abort, and not restart, any action */
>  	MD_RECOVERY_ERROR,	/* sync-action interrupted because io-error */
> +	MD_RECOVERY_WAIT,	/* waiting for pers->start() to finish */
>  };
>  
>  static inline int __must_check mddev_lock(struct mddev *mddev)
> @@ -523,7 +524,12 @@ struct md_personality
>  	struct list_head list;
>  	struct module *owner;
>  	bool (*make_request)(struct mddev *mddev, struct bio *bio);
> +	/* start up works that do NOT require md_thread. tasks that
> +	 * requires md_thread should go into start()
> +	 */

the comments should be:
	/*
	 * xxx
	 */
>  	int (*run)(struct mddev *mddev);
> +	/* start up works that require md threads */
> +	int (*start)(struct mddev *mddev);
>  	void (*free)(struct mddev *mddev, void *priv);
>  	void (*status)(struct seq_file *seq, struct mddev *mddev);
>  	/* error_handler must set ->faulty and clear ->in_sync
> @@ -687,6 +693,7 @@ extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
>  
>  extern void mddev_init(struct mddev *mddev);
>  extern int md_run(struct mddev *mddev);
> +extern int md_start(struct mddev *mddev);
>  extern void md_stop(struct mddev *mddev);
>  extern void md_stop_writes(struct mddev *mddev);
>  extern int md_rdev_init(struct md_rdev *rdev);
> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
> index e01f229..2e1978c 100644
> --- a/drivers/md/raid5-cache.c
> +++ b/drivers/md/raid5-cache.c
> @@ -2448,7 +2448,6 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
>  		raid5_release_stripe(sh);
>  	}
>  
> -	md_wakeup_thread(conf->mddev->thread);
>  	/* reuse conf->wait_for_quiescent in recovery */
>  	wait_event(conf->wait_for_quiescent,
>  		   atomic_read(&conf->active_stripes) == 0);
> @@ -3037,6 +3036,23 @@ static int r5l_load_log(struct r5l_log *log)
>  	return ret;
>  }
>  
> +int r5l_start(struct r5l_log *log)
> +{
> +	int ret;
> +
> +	if (!log)
> +		return 0;
> +
> +	ret = r5l_load_log(log);
> +	if (ret) {
> +		struct mddev *mddev = log->rdev->mddev;
> +		struct r5conf *conf = mddev->private;
> +
> +		r5l_exit_log(conf);
> +	}
> +	return ret;
> +}
> +
>  void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
>  {
>  	struct r5conf *conf = mddev->private;
> @@ -3139,13 +3155,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
>  
>  	rcu_assign_pointer(conf->log, log);
>  
> -	if (r5l_load_log(log))
> -		goto error;
> -
>  	set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
>  	return 0;
>  
> -error:
>  	rcu_assign_pointer(conf->log, NULL);
>  	md_unregister_thread(&log->reclaim_thread);
>  reclaim_thread:
> diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
> index c3596a2..d8577b0 100644
> --- a/drivers/md/raid5-log.h
> +++ b/drivers/md/raid5-log.h
> @@ -31,6 +31,7 @@ extern struct md_sysfs_entry r5c_journal_mode;
>  extern void r5c_update_on_rdev_error(struct mddev *mddev,
>  				     struct md_rdev *rdev);
>  extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
> +extern int r5l_start(struct r5l_log *log);
>  
>  extern struct dma_async_tx_descriptor *
>  ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 1649e82..39cdd78 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -8364,6 +8364,13 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
>  	return err;
>  }
>  
> +static int raid5_start(struct mddev *mddev)
> +{
> +	struct r5conf *conf = mddev->private;
> +
> +	return r5l_start(conf->log);
> +}
> +
>  static struct md_personality raid6_personality =
>  {
>  	.name		= "raid6",
> @@ -8371,6 +8378,7 @@ static struct md_personality raid6_personality =
>  	.owner		= THIS_MODULE,
>  	.make_request	= raid5_make_request,
>  	.run		= raid5_run,
> +	.start		= raid5_start,
>  	.free		= raid5_free,
>  	.status		= raid5_status,
>  	.error_handler	= raid5_error,
> @@ -8395,6 +8403,7 @@ static struct md_personality raid5_personality =
>  	.owner		= THIS_MODULE,
>  	.make_request	= raid5_make_request,
>  	.run		= raid5_run,
> +	.start		= raid5_start,
>  	.free		= raid5_free,
>  	.status		= raid5_status,
>  	.error_handler	= raid5_error,
> @@ -8420,6 +8429,7 @@ static struct md_personality raid4_personality =
>  	.owner		= THIS_MODULE,
>  	.make_request	= raid5_make_request,
>  	.run		= raid5_run,
> +	.start		= raid5_start,
>  	.free		= raid5_free,
>  	.status		= raid5_status,
>  	.error_handler	= raid5_error,
> -- 
> 2.9.5
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html