On Wed, May 31, 2023 at 04:27:30PM +0000, Martin Wilck wrote: > On Wed, 2023-05-24 at 18:21 -0500, Benjamin Marzinski wrote: > > need_switch_pathgroup() only checks if the currently used pathgroup > > is > > not the highest priority pathgroup. If it isn't, all multipathd does > > is > > instruct the kernel to switch to the correct pathgroup. However, the > > kernel treats the pathgroups as if they were ordered by priority. > > When > > the kernel runs out of paths to use in the currently selected > > pathgroup, > > it will start checking the pathgroups in order until it finds one > > with > > usable paths. > > > > need_switch_pathgroup() should also check if the pathgroups are out > > of > > order, and if so, multipathd should reload the map to reorder them > > correctly. > > > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > > --- > > libmultipath/libmultipath.version | 5 ++++ > > libmultipath/switchgroup.c | 27 ++++++++++++++++++++++ > > libmultipath/switchgroup.h | 1 + > > multipathd/main.c | 38 +++++++++++++++++++++-------- > > -- > > 4 files changed, 59 insertions(+), 12 deletions(-) > > > > diff --git a/libmultipath/libmultipath.version > > b/libmultipath/libmultipath.version > > index 8f72c452..38074699 100644 > > --- a/libmultipath/libmultipath.version > > +++ b/libmultipath/libmultipath.version > > @@ -237,3 +237,8 @@ global: > > local: > > *; > > }; > > + > > +LIBMULTIPATH_19.1.0 { > > +global: > > + path_groups_in_order; > > +} LIBMULTIPATH_19.0.0; > > diff --git a/libmultipath/switchgroup.c b/libmultipath/switchgroup.c > > index b1e1f39b..b1180839 100644 > > --- a/libmultipath/switchgroup.c > > +++ b/libmultipath/switchgroup.c > > @@ -7,6 +7,33 @@ > > #include "structs.h" > > #include "switchgroup.h" > > > > +bool path_groups_in_order(struct multipath *mpp) > > +{ > > + int i; > > + struct pathgroup *pgp; > > + bool seen_marginal_pg = false; > > + int last_prio = INT_MAX; > > + > > + if (VECTOR_SIZE(mpp->pg) < 2) > > + return true; > > + > > + vector_foreach_slot(mpp->pg, pgp, i) { > > + /* skip pgs with PRIO_UNDEF, since this is likely > > temporary */ > > + if (!pgp->paths || pgp->priority == PRIO_UNDEF) > > + continue; > > + if (pgp->marginal && !seen_marginal_pg) { > > + last_prio = INT_MAX; > > + continue; > > + } > > + if (seen_marginal_pg && !pgp->marginal) > > + return false; > > + if (pgp->priority > last_prio) > > + return false; > > + last_prio = pgp->priority; > > + } > > + return true; > > +} > > + > > void path_group_prio_update(struct pathgroup *pgp) > > { > > int i; > > diff --git a/libmultipath/switchgroup.h b/libmultipath/switchgroup.h > > index 9365e2e2..43dbb6c9 100644 > > --- a/libmultipath/switchgroup.h > > +++ b/libmultipath/switchgroup.h > > @@ -1,2 +1,3 @@ > > void path_group_prio_update (struct pathgroup * pgp); > > int select_path_group (struct multipath * mpp); > > +bool path_groups_in_order(struct multipath *mpp); > > diff --git a/multipathd/main.c b/multipathd/main.c > > index e7c272ad..2ea7c76b 100644 > > --- a/multipathd/main.c > > +++ b/multipathd/main.c > > @@ -396,7 +396,7 @@ void put_multipath_config(__attribute__((unused)) > > void *arg) > > } > > > > static int > > -need_switch_pathgroup (struct multipath * mpp, int refresh) > > +need_switch_pathgroup (struct multipath * mpp, int refresh, bool > > *need_reload) > > { > > struct pathgroup * pgp; > > struct path * pp; > > @@ -404,6 +404,7 @@ need_switch_pathgroup (struct multipath * mpp, > > int refresh) > > struct config *conf; > > int bestpg; > > > > + *need_reload = false; > > if (!mpp) > > return 0; > > > > @@ -430,10 +431,9 @@ need_switch_pathgroup (struct multipath * mpp, > > int refresh) > > return 0; > > > > mpp->bestpg = bestpg; > > - if (mpp->bestpg != mpp->nextpg) > > - return 1; > > + *need_reload = !path_groups_in_order(mpp); > > This will start another loop over the path groups. Can we just > integrate the path_groups_in_order() logic into the loop right here? > Sure -Ben > > > > > > - return 0; > > + return (*need_reload || mpp->bestpg != mpp->nextpg); > > } > > > > static void > > @@ -1982,20 +1982,26 @@ ghost_delay_tick(struct vectors *vecs) > > } > > > > static void > > -deferred_failback_tick (vector mpvec) > > +deferred_failback_tick (struct vectors *vecs) > > { > > struct multipath * mpp; > > unsigned int i; > > + bool need_reload; > > > > - vector_foreach_slot (mpvec, mpp, i) { > > + vector_foreach_slot (vecs->mpvec, mpp, i) { > > /* > > * deferred failback getting sooner > > */ > > if (mpp->pgfailback > 0 && mpp->failback_tick > 0) { > > mpp->failback_tick--; > > > > - if (!mpp->failback_tick && > > need_switch_pathgroup(mpp, 1)) > > - switch_pathgroup(mpp); > > + if (!mpp->failback_tick && > > + need_switch_pathgroup(mpp, 1, > > &need_reload)) { > > + if (need_reload) > > + reload_and_sync_map(mpp, > > vecs, 0); > > + else > > + switch_pathgroup(mpp); > > + } > > } > > } > > } > > @@ -2579,6 +2585,7 @@ check_path (struct vectors * vecs, struct path > > * pp, unsigned int ticks) > > int prio_changed = update_prio(pp, new_path_up); > > bool need_refresh = (!new_path_up && prio_changed && > > pp->priority != PRIO_UNDEF); > > + bool need_reload; > > > > if (prio_changed && > > pp->mpp->pgpolicyfn == (pgpolicyfn > > *)group_by_prio && > > @@ -2586,15 +2593,22 @@ check_path (struct vectors * vecs, struct > > path * pp, unsigned int ticks) > > condlog(2, "%s: path priorities changed. > > reloading", > > pp->mpp->alias); > > reload_and_sync_map(pp->mpp, vecs, > > !new_path_up); > > - } else if (need_switch_pathgroup(pp->mpp, > > need_refresh)) { > > + } else if (need_switch_pathgroup(pp->mpp, > > need_refresh, > > + &need_reload)) { > > if (pp->mpp->pgfailback > 0 && > > (new_path_up || pp->mpp->failback_tick <= > > 0)) > > pp->mpp->failback_tick = > > pp->mpp->pgfailback + 1; > > else if (pp->mpp->pgfailback == - > > FAILBACK_IMMEDIATE || > > (chkr_new_path_up && > > - followover_should_failback(pp))) > > - switch_pathgroup(pp->mpp); > > + followover_should_failback(pp))) { > > + if (need_reload) > > + reload_and_sync_map(pp->mpp, > > vecs, > > + > > !need_refresh && > > + > > !new_path_up); > > + else > > + switch_pathgroup(pp->mpp); > > + } > > } > > } > > return 1; > > @@ -2720,7 +2734,7 @@ unlock: > > pthread_cleanup_push(cleanup_lock, &vecs->lock); > > lock(&vecs->lock); > > pthread_testcancel(); > > - deferred_failback_tick(vecs->mpvec); > > + deferred_failback_tick(vecs); > > retry_count_tick(vecs->mpvec); > > missing_uev_wait_tick(vecs); > > ghost_delay_tick(vecs); > > -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel