On Wed, 2023-05-24 at 18:21 -0500, Benjamin Marzinski wrote: > need_switch_pathgroup() only checks if the currently used pathgroup > is > not the highest priority pathgroup. If it isn't, all multipathd does > is > instruct the kernel to switch to the correct pathgroup. However, the > kernel treats the pathgroups as if they were ordered by priority. > When > the kernel runs out of paths to use in the currently selected > pathgroup, > it will start checking the pathgroups in order until it finds one > with > usable paths. > > need_switch_pathgroup() should also check if the pathgroups are out > of > order, and if so, multipathd should reload the map to reorder them > correctly. > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > --- > libmultipath/libmultipath.version | 5 ++++ > libmultipath/switchgroup.c | 27 ++++++++++++++++++++++ > libmultipath/switchgroup.h | 1 + > multipathd/main.c | 38 +++++++++++++++++++++-------- > -- > 4 files changed, 59 insertions(+), 12 deletions(-) > > diff --git a/libmultipath/libmultipath.version > b/libmultipath/libmultipath.version > index 8f72c452..38074699 100644 > --- a/libmultipath/libmultipath.version > +++ b/libmultipath/libmultipath.version > @@ -237,3 +237,8 @@ global: > local: > *; > }; > + > +LIBMULTIPATH_19.1.0 { > +global: > + path_groups_in_order; > +} LIBMULTIPATH_19.0.0; > diff --git a/libmultipath/switchgroup.c b/libmultipath/switchgroup.c > index b1e1f39b..b1180839 100644 > --- a/libmultipath/switchgroup.c > +++ b/libmultipath/switchgroup.c > @@ -7,6 +7,33 @@ > #include "structs.h" > #include "switchgroup.h" > > +bool path_groups_in_order(struct multipath *mpp) > +{ > + int i; > + struct pathgroup *pgp; > + bool seen_marginal_pg = false; > + int last_prio = INT_MAX; > + > + if (VECTOR_SIZE(mpp->pg) < 2) > + return true; > + > + vector_foreach_slot(mpp->pg, pgp, i) { > + /* skip pgs with PRIO_UNDEF, since this is likely > temporary */ > + if (!pgp->paths || pgp->priority == PRIO_UNDEF) > + continue; > + if (pgp->marginal && !seen_marginal_pg) { > + last_prio = INT_MAX; > + continue; > + } > + if (seen_marginal_pg && !pgp->marginal) > + return false; > + if (pgp->priority > last_prio) > + return false; > + last_prio = pgp->priority; > + } > + return true; > +} > + > void path_group_prio_update(struct pathgroup *pgp) > { > int i; > diff --git a/libmultipath/switchgroup.h b/libmultipath/switchgroup.h > index 9365e2e2..43dbb6c9 100644 > --- a/libmultipath/switchgroup.h > +++ b/libmultipath/switchgroup.h > @@ -1,2 +1,3 @@ > void path_group_prio_update (struct pathgroup * pgp); > int select_path_group (struct multipath * mpp); > +bool path_groups_in_order(struct multipath *mpp); > diff --git a/multipathd/main.c b/multipathd/main.c > index e7c272ad..2ea7c76b 100644 > --- a/multipathd/main.c > +++ b/multipathd/main.c > @@ -396,7 +396,7 @@ void put_multipath_config(__attribute__((unused)) > void *arg) > } > > static int > -need_switch_pathgroup (struct multipath * mpp, int refresh) > +need_switch_pathgroup (struct multipath * mpp, int refresh, bool > *need_reload) > { > struct pathgroup * pgp; > struct path * pp; > @@ -404,6 +404,7 @@ need_switch_pathgroup (struct multipath * mpp, > int refresh) > struct config *conf; > int bestpg; > > + *need_reload = false; > if (!mpp) > return 0; > > @@ -430,10 +431,9 @@ need_switch_pathgroup (struct multipath * mpp, > int refresh) > return 0; > > mpp->bestpg = bestpg; > - if (mpp->bestpg != mpp->nextpg) > - return 1; > + *need_reload = !path_groups_in_order(mpp); This will start another loop over the path groups. Can we just integrate the path_groups_in_order() logic into the loop right here? > > - return 0; > + return (*need_reload || mpp->bestpg != mpp->nextpg); > } > > static void > @@ -1982,20 +1982,26 @@ ghost_delay_tick(struct vectors *vecs) > } > > static void > -deferred_failback_tick (vector mpvec) > +deferred_failback_tick (struct vectors *vecs) > { > struct multipath * mpp; > unsigned int i; > + bool need_reload; > > - vector_foreach_slot (mpvec, mpp, i) { > + vector_foreach_slot (vecs->mpvec, mpp, i) { > /* > * deferred failback getting sooner > */ > if (mpp->pgfailback > 0 && mpp->failback_tick > 0) { > mpp->failback_tick--; > > - if (!mpp->failback_tick && > need_switch_pathgroup(mpp, 1)) > - switch_pathgroup(mpp); > + if (!mpp->failback_tick && > + need_switch_pathgroup(mpp, 1, > &need_reload)) { > + if (need_reload) > + reload_and_sync_map(mpp, > vecs, 0); > + else > + switch_pathgroup(mpp); > + } > } > } > } > @@ -2579,6 +2585,7 @@ check_path (struct vectors * vecs, struct path > * pp, unsigned int ticks) > int prio_changed = update_prio(pp, new_path_up); > bool need_refresh = (!new_path_up && prio_changed && > pp->priority != PRIO_UNDEF); > + bool need_reload; > > if (prio_changed && > pp->mpp->pgpolicyfn == (pgpolicyfn > *)group_by_prio && > @@ -2586,15 +2593,22 @@ check_path (struct vectors * vecs, struct > path * pp, unsigned int ticks) > condlog(2, "%s: path priorities changed. > reloading", > pp->mpp->alias); > reload_and_sync_map(pp->mpp, vecs, > !new_path_up); > - } else if (need_switch_pathgroup(pp->mpp, > need_refresh)) { > + } else if (need_switch_pathgroup(pp->mpp, > need_refresh, > + &need_reload)) { > if (pp->mpp->pgfailback > 0 && > (new_path_up || pp->mpp->failback_tick <= > 0)) > pp->mpp->failback_tick = > pp->mpp->pgfailback + 1; > else if (pp->mpp->pgfailback == - > FAILBACK_IMMEDIATE || > (chkr_new_path_up && > - followover_should_failback(pp))) > - switch_pathgroup(pp->mpp); > + followover_should_failback(pp))) { > + if (need_reload) > + reload_and_sync_map(pp->mpp, > vecs, > + > !need_refresh && > + > !new_path_up); > + else > + switch_pathgroup(pp->mpp); > + } > } > } > return 1; > @@ -2720,7 +2734,7 @@ unlock: > pthread_cleanup_push(cleanup_lock, &vecs->lock); > lock(&vecs->lock); > pthread_testcancel(); > - deferred_failback_tick(vecs->mpvec); > + deferred_failback_tick(vecs); > retry_count_tick(vecs->mpvec); > missing_uev_wait_tick(vecs); > ghost_delay_tick(vecs); -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel