Re: [PATCH 3/5] libmultipath: drop mpp->nr_active field

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 15, 2019 at 02:41:50PM +0000, Martin Wilck wrote:
> From: Martin Wilck <mwilck@xxxxxxxx>
> 
> The tracking of nr_active has turned out to be error prone and hard
> to verify. Calculating it on the fly is a quick operation, so
> do this rather than trying to track nr_active. Use a boolean
> field instead to track whether a map is currently in recovery mode.
> 
> Moreover, clean up the recovery logic by making set_no_path_retry()
> the place that checks the current configuration and state, sets
> "queue_if_no_path" if necessary, and enters or leaves recovery
> mode if necessary. This ensures that consistent logic is applied.

Thanks. This looks better. The only thing I'm am sorta worried about is
that when we call the cli_handler functions, we haven't called
update_multipath_strings() to sync the state with the kernel first. This
could mean that multipathd is wrong about what the queue_if_no_path state
is, which is possible since it doesn't update mpp->features whenever it
calls dm_queue_if_no_path(). However, in the worst case scenario, this
would only cause multipathd to need to wait for the next call to
check_path to fix this up. Alternatively, we might to call
update_multipath_strings() here, or even replace the calls to
set_no_path_retry() with something like setup_multipath() or
update_multipath().

Any thoughts? I might just be being overly paranoid here, since I can't
really come up with a good explanation for how this could even get to be
in a problem state, and like I said, even if it does occur, it will just
get resolved on the next call to check_path.

-Ben


> Signed-off-by: Martin Wilck <mwilck@xxxxxxxx>
> ---
>  libmultipath/configure.c   |  5 ++-
>  libmultipath/devmapper.c   |  2 +-
>  libmultipath/io_err_stat.c |  4 +--
>  libmultipath/print.c       |  5 +--
>  libmultipath/structs.c     | 19 +++++++++++
>  libmultipath/structs.h     |  4 ++-
>  libmultipath/structs_vec.c | 69 ++++++++++++++++++++++++++------------
>  libmultipath/structs_vec.h |  1 -
>  multipathd/cli_handlers.c  | 39 ++++++++++-----------
>  multipathd/main.c          | 19 ++++-------
>  10 files changed, 101 insertions(+), 66 deletions(-)
> 
> diff --git a/libmultipath/configure.c b/libmultipath/configure.c
> index 5ac7d903..c95848a0 100644
> --- a/libmultipath/configure.c
> +++ b/libmultipath/configure.c
> @@ -401,7 +401,6 @@ int setup_map(struct multipath *mpp, char *params, int params_size,
>  			condlog(2, "%s: setting up map with %d/%d path checkers pending",
>  				mpp->alias, n_pending, n_paths);
>  	}
> -	mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
>  
>  	/*
>  	 * ponders each path group and determine highest prio pg
> @@ -934,8 +933,8 @@ int domap(struct multipath *mpp, char *params, int is_daemon)
>  		}
>  
>  		sysfs_set_max_sectors_kb(mpp, 0);
> -		if (is_daemon && mpp->ghost_delay > 0 && mpp->nr_active &&
> -		    pathcount(mpp, PATH_GHOST) == mpp->nr_active)
> +		if (is_daemon && mpp->ghost_delay > 0 && count_active_paths(mpp) &&
> +		    pathcount(mpp, PATH_UP) == 0)
>  			mpp->ghost_delay_tick = mpp->ghost_delay;
>  		r = dm_addmap_create(mpp, params);
>  
> diff --git a/libmultipath/devmapper.c b/libmultipath/devmapper.c
> index acf576aa..bed8ddc6 100644
> --- a/libmultipath/devmapper.c
> +++ b/libmultipath/devmapper.c
> @@ -403,7 +403,7 @@ static uint16_t build_udev_flags(const struct multipath *mpp, int reload)
>  	/* DM_UDEV_DISABLE_LIBRARY_FALLBACK is added in dm_addmap */
>  	return	(mpp->skip_kpartx == SKIP_KPARTX_ON ?
>  		 MPATH_UDEV_NO_KPARTX_FLAG : 0) |
> -		((mpp->nr_active == 0 || mpp->ghost_delay_tick > 0)?
> +		((count_active_paths(mpp) == 0 || mpp->ghost_delay_tick > 0) ?
>  		 MPATH_UDEV_NO_PATHS_FLAG : 0) |
>  		(reload && !mpp->force_udev_reload ?
>  		 MPATH_UDEV_RELOAD_FLAG : 0);
> diff --git a/libmultipath/io_err_stat.c b/libmultipath/io_err_stat.c
> index dcc8690d..1b9cd6c0 100644
> --- a/libmultipath/io_err_stat.c
> +++ b/libmultipath/io_err_stat.c
> @@ -383,7 +383,7 @@ int need_io_err_check(struct path *pp)
>  
>  	if (uatomic_read(&io_err_thread_running) == 0)
>  		return 0;
> -	if (pp->mpp->nr_active <= 0) {
> +	if (count_active_paths(pp->mpp) <= 0) {
>  		io_err_stat_log(2, "%s: recover path early", pp->dev);
>  		goto recover;
>  	}
> @@ -481,7 +481,7 @@ static int poll_io_err_stat(struct vectors *vecs, struct io_err_stat_path *pp)
>  		 */
>  		path->tick = 1;
>  
> -	} else if (path->mpp && path->mpp->nr_active > 0) {
> +	} else if (path->mpp && count_active_paths(path->mpp) > 0) {
>  		io_err_stat_log(3, "%s: keep failing the dm path %s",
>  				path->mpp->alias, path->dev);
>  		path->io_err_pathfail_cnt = PATH_IO_ERR_WAITING_TO_CHECK;
> diff --git a/libmultipath/print.c b/libmultipath/print.c
> index b98e9bda..e1ef4d3f 100644
> --- a/libmultipath/print.c
> +++ b/libmultipath/print.c
> @@ -181,9 +181,10 @@ snprint_queueing (char * buff, size_t len, const struct multipath * mpp)
>  		return snprintf(buff, len, "-");
>  	else if (mpp->no_path_retry > 0) {
>  		if (mpp->retry_tick > 0)
> +
>  			return snprintf(buff, len, "%i sec",
>  					mpp->retry_tick);
> -		else if (mpp->retry_tick == 0 && mpp->nr_active > 0)
> +		else if (mpp->retry_tick == 0 && count_active_paths(mpp) > 0)
>  			return snprintf(buff, len, "%i chk",
>  					mpp->no_path_retry);
>  		else
> @@ -195,7 +196,7 @@ snprint_queueing (char * buff, size_t len, const struct multipath * mpp)
>  static int
>  snprint_nb_paths (char * buff, size_t len, const struct multipath * mpp)
>  {
> -	return snprint_int(buff, len, mpp->nr_active);
> +	return snprint_int(buff, len, count_active_paths(mpp));
>  }
>  
>  static int
> diff --git a/libmultipath/structs.c b/libmultipath/structs.c
> index f420ee5c..cc931e4e 100644
> --- a/libmultipath/structs.c
> +++ b/libmultipath/structs.c
> @@ -478,6 +478,25 @@ int pathcount(const struct multipath *mpp, int state)
>  	return count;
>  }
>  
> +int count_active_paths(const struct multipath *mpp)
> +{
> +	struct pathgroup *pgp;
> +	struct path *pp;
> +	int count = 0;
> +	int i, j;
> +
> +	if (!mpp->pg)
> +		return 0;
> +
> +	vector_foreach_slot (mpp->pg, pgp, i) {
> +		vector_foreach_slot (pgp->paths, pp, j) {
> +			if (pp->state == PATH_UP || pp->state == PATH_GHOST)
> +				count++;
> +		}
> +	}
> +	return count;
> +}
> +
>  int pathcmp(const struct pathgroup *pgp, const struct pathgroup *cpgp)
>  {
>  	int i, j;
> diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> index 3665b89a..da4b6ca0 100644
> --- a/libmultipath/structs.h
> +++ b/libmultipath/structs.h
> @@ -3,6 +3,7 @@
>  
>  #include <sys/types.h>
>  #include <inttypes.h>
> +#include <stdbool.h>
>  
>  #include "prio.h"
>  #include "byteorder.h"
> @@ -309,7 +310,6 @@ struct multipath {
>  	int pgfailback;
>  	int failback_tick;
>  	int rr_weight;
> -	int nr_active;     /* current available(= not known as failed) paths */
>  	int no_path_retry; /* number of retries after all paths are down */
>  	int retry_tick;    /* remaining times for retries */
>  	int disable_queueing;
> @@ -319,6 +319,7 @@ struct multipath {
>  	int fast_io_fail;
>  	int retain_hwhandler;
>  	int deferred_remove;
> +	bool in_recovery;
>  	int san_path_err_threshold;
>  	int san_path_err_forget_rate;
>  	int san_path_err_recovery_time;
> @@ -449,6 +450,7 @@ struct path * first_path (const struct multipath *mpp);
>  
>  int pathcountgr (const struct pathgroup *, int);
>  int pathcount (const struct multipath *, int);
> +int count_active_paths(const struct multipath *);
>  int pathcmp (const struct pathgroup *, const struct pathgroup *);
>  int add_feature (char **, const char *);
>  int remove_feature (char **, const char *);
> diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c
> index fbe97662..0c5a3a81 100644
> --- a/libmultipath/structs_vec.c
> +++ b/libmultipath/structs_vec.c
> @@ -290,10 +290,15 @@ update_multipath_strings(struct multipath *mpp, vector pathvec, int is_daemon)
>  	return 0;
>  }
>  
> -void enter_recovery_mode(struct multipath *mpp)
> +static void enter_recovery_mode(struct multipath *mpp)
>  {
>  	unsigned int checkint;
> -	struct config *conf = get_multipath_config();
> +	struct config *conf;
> +
> +	if (mpp->in_recovery || mpp->no_path_retry <= 0)
> +		return;
> +
> +	conf = get_multipath_config();
>  	checkint = conf->checkint;
>  	put_multipath_config(conf);
>  
> @@ -302,17 +307,37 @@ void enter_recovery_mode(struct multipath *mpp)
>  	 * meaning of +1: retry_tick may be decremented in checkerloop before
>  	 * starting retry.
>  	 */
> +	mpp->in_recovery = true;
>  	mpp->stat_queueing_timeouts++;
>  	mpp->retry_tick = mpp->no_path_retry * checkint + 1;
>  	condlog(1, "%s: Entering recovery mode: max_retries=%d",
>  		mpp->alias, mpp->no_path_retry);
>  }
>  
> +static void leave_recovery_mode(struct multipath *mpp)
> +{
> +	bool recovery = mpp->in_recovery;
> +
> +	mpp->in_recovery = false;
> +	mpp->retry_tick = 0;
> +
> +	/*
> +	 * in_recovery is only ever set if mpp->no_path_retry > 0
> +	 * (see enter_recovery_mode()). But no_path_retry may have been
> +	 * changed while the map was recovering, so test it here again.
> +	 */
> +	if (recovery && (mpp->no_path_retry == NO_PATH_RETRY_QUEUE ||
> +			 mpp->no_path_retry > 0)) {
> +		dm_queue_if_no_path(mpp->alias, 1);
> +		condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
> +		condlog(1, "%s: Recovered to normal mode", mpp->alias);
> +	}
> +}
> +
>  void set_no_path_retry(struct multipath *mpp)
>  {
> -	char is_queueing = 0;
> +	bool is_queueing = 0;
>  
> -	mpp->nr_active = pathcount(mpp, PATH_UP) + pathcount(mpp, PATH_GHOST);
>  	if (mpp->features && strstr(mpp->features, "queue_if_no_path"))
>  		is_queueing = 1;
>  
> @@ -328,11 +353,15 @@ void set_no_path_retry(struct multipath *mpp)
>  			dm_queue_if_no_path(mpp->alias, 1);
>  		break;
>  	default:
> -		if (mpp->nr_active > 0) {
> -			mpp->retry_tick = 0;
> -			if (!is_queueing)
> +		if (count_active_paths(mpp) > 0) {
> +			/*
> +			 * If in_recovery is set, leave_recovery_mode() takes
> +			 * care of dm_queue_if_no_path. Otherwise, do it here.
> +			 */
> +			if (!is_queueing && !mpp->in_recovery)
>  				dm_queue_if_no_path(mpp->alias, 1);
> -		} else if (is_queueing && mpp->retry_tick == 0)
> +			leave_recovery_mode(mpp);
> +		} else
>  			enter_recovery_mode(mpp);
>  		break;
>  	}
> @@ -480,25 +509,23 @@ int verify_paths(struct multipath *mpp, struct vectors *vecs)
>   */
>  void update_queue_mode_del_path(struct multipath *mpp)
>  {
> -	if (--mpp->nr_active == 0) {
> -		if (mpp->no_path_retry > 0)
> -			enter_recovery_mode(mpp);
> -		else if (mpp->no_path_retry != NO_PATH_RETRY_QUEUE)
> +	int active = count_active_paths(mpp);
> +
> +	if (active == 0) {
> +		enter_recovery_mode(mpp);
> +		if (mpp->no_path_retry != NO_PATH_RETRY_QUEUE)
>  			mpp->stat_map_failures++;
>  	}
> -	condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
> +	condlog(2, "%s: remaining active paths: %d", mpp->alias, active);
>  }
>  
>  void update_queue_mode_add_path(struct multipath *mpp)
>  {
> -	if (mpp->nr_active++ == 0 && mpp->no_path_retry > 0) {
> -		/* come back to normal mode from retry mode */
> -		mpp->retry_tick = 0;
> -		dm_queue_if_no_path(mpp->alias, 1);
> -		condlog(2, "%s: queue_if_no_path enabled", mpp->alias);
> -		condlog(1, "%s: Recovered to normal mode", mpp->alias);
> -	}
> -	condlog(2, "%s: remaining active paths: %d", mpp->alias, mpp->nr_active);
> +	int active = count_active_paths(mpp);
> +
> +	if (active > 0)
> +		leave_recovery_mode(mpp);
> +	condlog(2, "%s: remaining active paths: %d", mpp->alias, active);
>  }
>  
>  vector get_used_hwes(const struct _vector *pathvec)
> diff --git a/libmultipath/structs_vec.h b/libmultipath/structs_vec.h
> index d3219278..678efe4d 100644
> --- a/libmultipath/structs_vec.h
> +++ b/libmultipath/structs_vec.h
> @@ -12,7 +12,6 @@ struct vectors {
>  };
>  
>  void set_no_path_retry(struct multipath *mpp);
> -void enter_recovery_mode(struct multipath *mpp);
>  
>  int adopt_paths (vector pathvec, struct multipath * mpp);
>  void orphan_paths(vector pathvec, struct multipath *mpp,
> diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
> index 371b0a79..7d878c88 100644
> --- a/multipathd/cli_handlers.c
> +++ b/multipathd/cli_handlers.c
> @@ -1024,16 +1024,17 @@ cli_restore_queueing(void *v, char **reply, int *len, void *data)
>  	select_no_path_retry(conf, mpp);
>  	pthread_cleanup_pop(1);
>  
> +	/*
> +	 * Don't call set_no_path_retry() for the NO_PATH_RETRY_FAIL case.
> +	 * That would disable queueing when "restorequeueing" is called,
> +	 * and the code never behaved that way. Users might not expect it.
> +	 * In almost all cases, queueing will be disabled anyway when we
> +	 * are here.
> +	 */
>  	if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF &&
> -			mpp->no_path_retry != NO_PATH_RETRY_FAIL) {
> -		dm_queue_if_no_path(mpp->alias, 1);
> -		if (mpp->no_path_retry > 0) {
> -			if (mpp->nr_active > 0)
> -				mpp->retry_tick = 0;
> -			else
> -				enter_recovery_mode(mpp);
> -		}
> -	}
> +	    mpp->no_path_retry != NO_PATH_RETRY_FAIL)
> +		set_no_path_retry(mpp);
> +
>  	return 0;
>  }
>  
> @@ -1051,16 +1052,10 @@ cli_restore_all_queueing(void *v, char **reply, int *len, void *data)
>  		pthread_cleanup_push(put_multipath_config, conf);
>  		select_no_path_retry(conf, mpp);
>  		pthread_cleanup_pop(1);
> +		/* See comment in cli_restore_queueing() */
>  		if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF &&
> -		    mpp->no_path_retry != NO_PATH_RETRY_FAIL) {
> -			dm_queue_if_no_path(mpp->alias, 1);
> -			if (mpp->no_path_retry > 0) {
> -				if (mpp->nr_active > 0)
> -					mpp->retry_tick = 0;
> -				else
> -					enter_recovery_mode(mpp);
> -			}
> -		}
> +		    mpp->no_path_retry != NO_PATH_RETRY_FAIL)
> +			set_no_path_retry(mpp);
>  	}
>  	return 0;
>  }
> @@ -1085,12 +1080,12 @@ cli_disable_queueing(void *v, char **reply, int *len, void *data)
>  		return 1;
>  	}
>  
> -	if (mpp->nr_active == 0)
> +	if (count_active_paths(mpp) == 0)
>  		mpp->stat_map_failures++;
>  	mpp->retry_tick = 0;
>  	mpp->no_path_retry = NO_PATH_RETRY_FAIL;
>  	mpp->disable_queueing = 1;
> -	dm_queue_if_no_path(mpp->alias, 0);
> +	set_no_path_retry(mpp);
>  	return 0;
>  }
>  
> @@ -1103,12 +1098,12 @@ cli_disable_all_queueing(void *v, char **reply, int *len, void *data)
>  
>  	condlog(2, "disable queueing (operator)");
>  	vector_foreach_slot(vecs->mpvec, mpp, i) {
> -		if (mpp->nr_active == 0)
> +		if (count_active_paths(mpp) == 0)
>  			mpp->stat_map_failures++;
>  		mpp->retry_tick = 0;
>  		mpp->no_path_retry = NO_PATH_RETRY_FAIL;
>  		mpp->disable_queueing = 1;
> -		dm_queue_if_no_path(mpp->alias, 0);
> +		set_no_path_retry(mpp);
>  	}
>  	return 0;
>  }
> diff --git a/multipathd/main.c b/multipathd/main.c
> index a21d96e4..c0423602 100644
> --- a/multipathd/main.c
> +++ b/multipathd/main.c
> @@ -1616,7 +1616,7 @@ fail_path (struct path * pp, int del_active)
>   * caller must have locked the path list before calling that function
>   */
>  static int
> -reinstate_path (struct path * pp, int add_active)
> +reinstate_path (struct path * pp)
>  {
>  	int ret = 0;
>  
> @@ -1628,8 +1628,7 @@ reinstate_path (struct path * pp, int add_active)
>  		ret = 1;
>  	} else {
>  		condlog(2, "%s: reinstated", pp->dev_t);
> -		if (add_active)
> -			update_queue_mode_add_path(pp->mpp);
> +		update_queue_mode_add_path(pp->mpp);
>  	}
>  	return ret;
>  }
> @@ -1861,7 +1860,7 @@ static int check_path_reinstate_state(struct path * pp) {
>  
>  	if (pp->disable_reinstate) {
>  		/* If there are no other usable paths, reinstate the path */
> -		if (pp->mpp->nr_active == 0) {
> +		if (count_active_paths(pp->mpp) == 0) {
>  			condlog(2, "%s : reinstating path early", pp->dev);
>  			goto reinstate_path;
>  		}
> @@ -1960,7 +1959,6 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  	int newstate;
>  	int new_path_up = 0;
>  	int chkr_new_path_up = 0;
> -	int add_active;
>  	int disable_reinstate = 0;
>  	int oldchkrstate = pp->chkrstate;
>  	int retrigger_tries, verbosity;
> @@ -2134,7 +2132,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  	 * paths if there are no other active paths in map.
>  	 */
>  	disable_reinstate = (newstate == PATH_GHOST &&
> -			     pp->mpp->nr_active == 0 &&
> +			     count_active_paths(pp->mpp) == 0 &&
>  			     path_get_tpgs(pp) == TPGS_IMPLICIT) ? 1 : 0;
>  
>  	pp->chkrstate = newstate;
> @@ -2185,12 +2183,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  		/*
>  		 * reinstate this path
>  		 */
> -		if (oldstate != PATH_UP &&
> -		    oldstate != PATH_GHOST)
> -			add_active = 1;
> -		else
> -			add_active = 0;
> -		if (!disable_reinstate && reinstate_path(pp, add_active)) {
> +		if (!disable_reinstate && reinstate_path(pp)) {
>  			condlog(3, "%s: reload map", pp->dev);
>  			ev_add_path(pp, vecs, 1);
>  			pp->tick = 1;
> @@ -2213,7 +2206,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks)
>  		    pp->dmstate == PSTATE_UNDEF) &&
>  		    !disable_reinstate) {
>  			/* Clear IO errors */
> -			if (reinstate_path(pp, 0)) {
> +			if (reinstate_path(pp)) {
>  				condlog(3, "%s: reload map", pp->dev);
>  				ev_add_path(pp, vecs, 1);
>  				pp->tick = 1;
> -- 
> 2.24.0

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel





[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux