Re: [PATCH 13/15] multipathd: update priority once after updating all paths

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Sep 04, 2024 at 08:57:46PM +0200, Martin Wilck wrote:
> On Wed, 2024-08-28 at 18:17 -0400, Benjamin Marzinski wrote:
> > Instead of updating the path priorities and possibly reloading the
> > multipath device when each path is updated, wait till all paths
> > have been updated, and then go through the multipath devices updating
> > the priorities once, reloading if necessary.
> > 
> > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx>
> > ---
> >  libmultipath/structs.h |   9 +++
> >  multipathd/main.c      | 169 ++++++++++++++++++++++++++-------------
> > --
> >  2 files changed, 118 insertions(+), 60 deletions(-)
> > 
> > diff --git a/libmultipath/structs.h b/libmultipath/structs.h
> > index af8e31e9..1f531d30 100644
> > --- a/libmultipath/structs.h
> > +++ b/libmultipath/structs.h
> > @@ -318,6 +318,7 @@ enum check_path_states {
> >  	CHECK_PATH_UNCHECKED,
> >  	CHECK_PATH_STARTED,
> >  	CHECK_PATH_CHECKED,
> > +	CHECK_PATH_NEW_UP,
> >  	CHECK_PATH_SKIPPED,
> >  	CHECK_PATH_REMOVED,
> >  };
> > @@ -421,6 +422,13 @@ enum prflag_value {
> >  	PRFLAG_SET,
> >  };
> >  
> > +enum prio_update_type {
> > +	PRIO_UPDATE_NONE,
> > +	PRIO_UPDATE_NORMAL,
> > +	PRIO_UPDATE_NEW_PATH,
> > +	PRIO_UPDATE_MARGINAL,
> > +};
> > +
> >  struct multipath {
> >  	char wwid[WWID_SIZE];
> >  	char alias_old[WWID_SIZE];
> > @@ -464,6 +472,7 @@ struct multipath {
> >  	int queue_mode;
> >  	unsigned int sync_tick;
> >  	int synced_count;
> > +	enum prio_update_type prio_update;
> >  	uid_t uid;
> >  	gid_t gid;
> >  	mode_t mode;
> > diff --git a/multipathd/main.c b/multipathd/main.c
> > index 2bcc6066..1511f701 100644
> > --- a/multipathd/main.c
> > +++ b/multipathd/main.c
> > @@ -1996,15 +1996,13 @@ mpvec_garbage_collector (struct vectors *
> > vecs)
> >   * best pathgroup, and this is the first path in the pathgroup to
> > come back
> >   * up, then switch to this pathgroup */
> >  static int
> > -followover_should_failback(struct path * pp)
> > +do_followover_should_failback(struct path * pp)
> >  {
> >  	struct pathgroup * pgp;
> >  	struct path *pp1;
> >  	int i;
> >  
> > -	if (pp->mpp->pgfailback != -FAILBACK_FOLLOWOVER ||
> > -	    !pp->mpp->pg || !pp->pgindex ||
> > -	    pp->pgindex != pp->mpp->bestpg)
> > +	if (pp->pgindex != pp->mpp->bestpg)
> >  		return 0;
> 
> What happened to the !pp->pgindex test? Is it not necessary any more?

In followover_should_failback(), before calling this function, we test
if mpp->bestpg is 0 and return early if it is (because 0 is not a valid
pathgroup id).  Thus, if pp->pgindex equals mpp->bestpg (and we should
failback), it's obviously not zero. If it's not equal to mpp->bestpg,
then it doesn't matter what it is, because we aren't doing the
failback anyway. Right?

> 
> 
> >  
> >  	pgp = VECTOR_SLOT(pp->mpp->pg, pp->pgindex - 1);
> > @@ -2017,6 +2015,26 @@ followover_should_failback(struct path * pp)
> >  	return 1;
> >  }
> >  
> > +static int
> > +followover_should_failback(struct multipath *mpp)
> > +{
> > +	struct path *pp;
> > +	struct pathgroup * pgp;
> > +	int i, j;
> > +
> > +	if (mpp->pgfailback != -FAILBACK_FOLLOWOVER || !mpp->pg ||
> > !mpp->bestpg)
> > +		return 0;
> > +
> > +	vector_foreach_slot (mpp->pg, pgp, i) {
> > +		vector_foreach_slot (pgp->paths, pp, j) {
> > +			if (pp->is_checked == CHECK_PATH_NEW_UP &&
> > +			    do_followover_should_failback(pp))
> > +				return 1;
> > +		}
> > +	}
> > +	return 0;
> > +}
> > +
> >  static void
> >  missing_uev_wait_tick(struct vectors *vecs)
> >  {
> > @@ -2132,41 +2150,53 @@ partial_retrigger_tick(vector pathvec)
> >  	}
> >  }
> >  
> > -static int update_prio(struct path *pp, int force_refresh_all)
> > +static bool update_prio(struct multipath *mpp, bool refresh_all)
> >  {
> >  	int oldpriority;
> > -	struct path *pp1;
> > +	struct path *pp;
> >  	struct pathgroup * pgp;
> > -	int i, j, changed = 0;
> > +	int i, j;
> > +	bool changed = false;
> > +	bool skipped_path = false;
> >  	struct config *conf;
> >  
> > -	oldpriority = pp->priority;
> > -	if (pp->state != PATH_DOWN) {
> > -		conf = get_multipath_config();
> > -		pthread_cleanup_push(put_multipath_config, conf);
> > -		pathinfo(pp, conf, DI_PRIO);
> > -		pthread_cleanup_pop(1);
> > +	vector_foreach_slot (mpp->pg, pgp, i) {
> > +		vector_foreach_slot (pgp->paths, pp, j) {
> > +			if (pp->state != PATH_UP && pp->state !=
> > PATH_GHOST)
> > +				continue;
> 
> This test used to be pp->state == PATH_DOWN (see below).
> 
> Just trying to confirm that you did this on purpose. It might justify a
> separate patch, with rationale. After all, AFAICS, we will now keep the
> old priority for all path states except PATH_UP and PATH_GHOST, while
> previously we attempted to fetch the prio for most of them, and only
> used the previous value if fetching the prio failed.

In my understanding, we don't try getting the priority of failed paths
because some of the prioritizers can hang. On this grounds, if we are
excluding PATH_DOWN we should definitely also exclude PATH_SHAKY and
PATH_TIMEOUT and probably also PATH_DELAYED.

Also, since only paths that are in PATH_UP or PATH_GHOST are usable by
the kernel and contribute to the pathgroup's priority, not attempting to
update the priority of these other paths avoids the risk of hanging whil
not hurting anything (except that they might end up in the wrong
pathgroup during the time when they aren't usable, if you have
group_by_prio set).

Lastly, The way do_check_path() worked before this patchset was
inconsistent. If a path changed state to anything other than PATH_UP or
PATH_GHOST we skipped the prio update. However if the path kept the same
state we did the prio update, even if it wasn't PATH_UP or PATH_GHOST,
as long as it wasn't PATH_DOWN. That seems wrong.

But I'm fine with making these changes in a separate patch.

-Ben
 
> > +			if (!refresh_all &&
> > +			    pp->is_checked != CHECK_PATH_CHECKED) {
> > +				skipped_path = true;
> > +				continue;
> > +			}
> > +			oldpriority = pp->priority;
> > +			conf = get_multipath_config();
> > +			pthread_cleanup_push(put_multipath_config,
> > conf);
> > +			pathinfo(pp, conf, DI_PRIO);
> > +			pthread_cleanup_pop(1);
> > +			if (pp->priority != oldpriority)
> > +				changed = true;
> > +		}
> >  	}
> > -
> > -	if (pp->priority != oldpriority)
> > -		changed = 1;
> > -	else if (!force_refresh_all)
> > -		return 0;
> > -
> > -	vector_foreach_slot (pp->mpp->pg, pgp, i) {
> > -		vector_foreach_slot (pgp->paths, pp1, j) {
> > -			if (pp1 == pp || pp1->state == PATH_DOWN)
> > +	if (!changed || !skipped_path)
> > +		return changed;
> > +	/*
> > +	 * If a path changed priorities, refresh the priorities of
> > any
> > +	 * paths we skipped
> > +	 */
> > +	vector_foreach_slot (mpp->pg, pgp, i) {
> > +		vector_foreach_slot (pgp->paths, pp, j) {
> > +			if (pp->state != PATH_UP && pp->state !=
> > PATH_GHOST)
> > +				continue;
> > +			if (pp->is_checked == CHECK_PATH_CHECKED)
> >  				continue;
> > -			oldpriority = pp1->priority;
> >  			conf = get_multipath_config();
> >  			pthread_cleanup_push(put_multipath_config,
> > conf);
> > -			pathinfo(pp1, conf, DI_PRIO);
> > +			pathinfo(pp, conf, DI_PRIO);
> >  			pthread_cleanup_pop(1);
> > -			if (pp1->priority != oldpriority)
> > -				changed = 1;
> >  		}
> >  	}
> > -	return changed;
> > +	return true;
> >  }
> >  
> >  static int reload_map(struct vectors *vecs, struct multipath *mpp,
> > @@ -2393,14 +2423,12 @@ static int
> >  update_path_state (struct vectors * vecs, struct path * pp)
> >  {
> >  	int newstate;
> > -	int new_path_up = 0;
> >  	int chkr_new_path_up = 0;
> >  	int disable_reinstate = 0;
> >  	int oldchkrstate = pp->chkrstate;
> >  	unsigned int checkint, max_checkint;
> >  	struct config *conf;
> > -	int marginal_pathgroups, marginal_changed = 0;
> > -	bool need_reload;
> > +	int marginal_pathgroups;
> >  
> >  	conf = get_multipath_config();
> >  	checkint = conf->checkint;
> > @@ -2462,7 +2490,7 @@ update_path_state (struct vectors * vecs,
> > struct path * pp)
> >  			}
> >  			if (!pp->marginal) {
> >  				pp->marginal = 1;
> > -				marginal_changed = 1;
> > +				pp->mpp->prio_update =
> > PRIO_UPDATE_MARGINAL;
> >  			}
> >  		} else {
> >  			if (pp->marginal || pp->state ==
> > PATH_DELAYED)
> > @@ -2470,7 +2498,7 @@ update_path_state (struct vectors * vecs,
> > struct path * pp)
> >  					pp->dev);
> >  			if (marginal_pathgroups && pp->marginal) {
> >  				pp->marginal = 0;
> > -				marginal_changed = 1;
> > +				pp->mpp->prio_update =
> > PRIO_UPDATE_MARGINAL;
> >  			}
> >  		}
> >  	}
> > @@ -2537,7 +2565,8 @@ update_path_state (struct vectors * vecs,
> > struct path * pp)
> >  		 */
> >  		if (!disable_reinstate)
> >  			reinstate_path(pp);
> > -		new_path_up = 1;
> > +		if (pp->mpp->prio_update != PRIO_UPDATE_MARGINAL)
> > +			pp->mpp->prio_update = PRIO_UPDATE_NEW_PATH;
> >  
> >  		if (oldchkrstate != PATH_UP && oldchkrstate !=
> > PATH_GHOST)
> >  			chkr_new_path_up = 1;
> > @@ -2588,38 +2617,48 @@ update_path_state (struct vectors * vecs,
> > struct path * pp)
> >  				LOG_MSG(2, pp);
> >  		}
> >  	}
> > -
> > +	if (pp->mpp->prio_update == PRIO_UPDATE_NONE &&
> > +            (newstate == PATH_UP || newstate == PATH_GHOST))
> > +		pp->mpp->prio_update = PRIO_UPDATE_NORMAL;
> >  	pp->state = newstate;
> > +	return chkr_new_path_up ? CHECK_PATH_NEW_UP :
> > CHECK_PATH_CHECKED;
> > +}
> >  
> > -	if (pp->mpp->wait_for_udev)
> > -		return CHECK_PATH_CHECKED;
> > -	/*
> > -	 * path prio refreshing
> > -	 */
> > -	condlog(4, "path prio refresh");
> > -
> > -	if (marginal_changed) {
> > -		update_prio(pp, 1);
> > -		reload_and_sync_map(pp->mpp, vecs);
> > -	} else if (update_prio(pp, new_path_up) &&
> > -		   pp->mpp->pgpolicyfn == (pgpolicyfn
> > *)group_by_prio &&
> > -		   pp->mpp->pgfailback == -FAILBACK_IMMEDIATE) {
> > +static int
> > +update_mpp_prio(struct vectors *vecs, struct multipath *mpp)
> > +{
> > +	bool need_reload, changed;
> > +	enum prio_update_type prio_update = mpp->prio_update;
> > +	mpp->prio_update = PRIO_UPDATE_NONE;
> > +
> > +	if (mpp->wait_for_udev || prio_update == PRIO_UPDATE_NONE)
> > +		return 0;
> > +	condlog(4, "prio refresh");
> > +
> > +	changed = update_prio(mpp, prio_update !=
> > PRIO_UPDATE_NORMAL);
> > +	if (prio_update == PRIO_UPDATE_MARGINAL)
> > +		return reload_and_sync_map(mpp, vecs);
> > +	if (changed && mpp->pgpolicyfn == (pgpolicyfn
> > *)group_by_prio &&
> > +	    mpp->pgfailback == -FAILBACK_IMMEDIATE) {
> >  		condlog(2, "%s: path priorities changed. reloading",
> > -			pp->mpp->alias);
> > -		reload_and_sync_map(pp->mpp, vecs);
> > -	} else if (need_switch_pathgroup(pp->mpp, &need_reload)) {
> > -		if (pp->mpp->pgfailback > 0 &&
> > -		    (new_path_up || pp->mpp->failback_tick <= 0))
> > -			pp->mpp->failback_tick = pp->mpp->pgfailback
> > + 1;
> > -		else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE
> > ||
> > -			 (chkr_new_path_up &&
> > followover_should_failback(pp))) {
> > +			mpp->alias);
> > +		return reload_and_sync_map(mpp, vecs);
> > +	}
> > +	if (need_switch_pathgroup(mpp, &need_reload)) {
> > +		if (mpp->pgfailback > 0 &&
> > +		    (prio_update == PRIO_UPDATE_NEW_PATH ||
> > +		     mpp->failback_tick <= 0))
> > +			mpp->failback_tick = mpp->pgfailback + 1;
> > +		else if (mpp->pgfailback == -FAILBACK_IMMEDIATE ||
> > +			 (prio_update == PRIO_UPDATE_NEW_PATH &&
> > +			  followover_should_failback(mpp))) {
> >  			if (need_reload)
> > -				reload_and_sync_map(pp->mpp, vecs);
> > +				return reload_and_sync_map(mpp,
> > vecs);
> >  			else
> > -				switch_pathgroup(pp->mpp);
> > +				switch_pathgroup(mpp);
> >  		}
> >  	}
> > -	return CHECK_PATH_CHECKED;
> > +	return 0;
> >  }
> >  
> >  static int
> > @@ -2859,7 +2898,7 @@ update_paths(struct vectors *vecs, int
> > *num_paths_p, time_t start_secs)
> >  			i--;
> >  		else {
> >  			pp->is_checked = rc;
> > -			if (rc == CHECK_PATH_CHECKED)
> > +			if (rc == CHECK_PATH_CHECKED ||
> > CHECK_PATH_NEW_UP)
> >  				(*num_paths_p)++;
> >  		}
> >  		if (++paths_checked % 128 == 0 &&
> > @@ -2932,8 +2971,10 @@ checkerloop (void *ap)
> >  			vector_foreach_slot(vecs->mpvec, mpp, i)
> >  				mpp->synced_count = 0;
> >  			if (checker_state == CHECKER_STARTING) {
> > -				vector_foreach_slot(vecs->mpvec,
> > mpp, i)
> > +				vector_foreach_slot(vecs->mpvec,
> > mpp, i) {
> >  					sync_mpp(vecs, mpp, ticks);
> > +					mpp->prio_update =
> > PRIO_UPDATE_NONE;
> > +				}
> >  				vector_foreach_slot(vecs->pathvec,
> > pp, i)
> >  					pp->is_checked =
> > CHECK_PATH_UNCHECKED;
> >  				checker_state =
> > CHECKER_CHECKING_PATHS;
> > @@ -2943,6 +2984,14 @@ checkerloop (void *ap)
> >  			if (checker_state == CHECKER_UPDATING_PATHS)
> >  				checker_state = update_paths(vecs,
> > &num_paths,
> >  							    
> > start_time.tv_sec);
> > +			if (checker_state == CHECKER_FINISHED) {
> > +				vector_foreach_slot(vecs->mpvec,
> > mpp, i) {
> > +					if (update_mpp_prio(vecs,
> > mpp) == 2) {
> > +						/* multipath device
> > deleted */
> > +						i--;
> > +					}
> > +				}
> > +			}
> >  			lock_cleanup_pop(vecs->lock);
> >  			if (checker_state != CHECKER_FINISHED) {
> >  				/* Yield to waiters */





[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux