On Tue, 2023-06-06 at 15:13 -0500, Benjamin Marzinski wrote: > need_switch_pathgroup() only checks if the currently used pathgroup > is > not the highest priority pathgroup. If it isn't, all multipathd does > is > instruct the kernel to switch to the correct pathgroup. However, the > kernel treats the pathgroups as if they were ordered by priority. > When > the kernel runs out of paths to use in the currently selected > pathgroup, > it will start checking the pathgroups in order until it finds one > with > usable paths. > > need_switch_pathgroup() should also check if the pathgroups are out > of > order, and if so, multipathd should reload the map to reorder them > correctly. > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > --- > multipathd/main.c | 69 ++++++++++++++++++++++++++++++++++++++------- > -- > 1 file changed, 57 insertions(+), 12 deletions(-) > > diff --git a/multipathd/main.c b/multipathd/main.c > index f603d143..05c74e9e 100644 > --- a/multipathd/main.c > +++ b/multipathd/main.c > @@ -395,11 +395,46 @@ void > put_multipath_config(__attribute__((unused)) void *arg) > rcu_read_unlock(); > } > > +/* > + * The path group orderings that this function finds acceptible are > different > + * from now select_path_group determines the best pathgroup. The > idea here is > + * to only trigger a kernel reload when it is obvious that the > pathgroups would > + * be out of order, even if all the paths were usable. Thus > pathgroups with > + * PRIO_UNDEF are skipped, and the number of enabled paths doesn't > matter here. > + */ > +bool path_groups_in_order(struct multipath *mpp) > +{ > + int i; > + struct pathgroup *pgp; > + bool seen_marginal_pg = false; > + int last_prio = INT_MAX; > + > + if (VECTOR_SIZE(mpp->pg) < 2) > + return true; > + > + vector_foreach_slot(mpp->pg, pgp, i) { > + /* skip pgs with PRIO_UNDEF, since this is likely > temporary */ > + if (!pgp->paths || pgp->priority == PRIO_UNDEF) > + continue; > + if (pgp->marginal && !seen_marginal_pg) { > + last_prio = INT_MAX; > + continue; > + } > + if (seen_marginal_pg && !pgp->marginal) > + return false; > + if (pgp->priority > last_prio) > + return false; > + last_prio = pgp->priority; > + } > + return true; > +} I still don't get the logic here. What's the point of using seen_marginal_pg if it is always false? See my previous reply to v1 of this patch. Regards Martin > + > static int > -need_switch_pathgroup (struct multipath * mpp) > +need_switch_pathgroup (struct multipath * mpp, bool *need_reload) > { > int bestpg; > > + *need_reload = false; > if (!mpp) > return 0; > > @@ -411,10 +446,9 @@ need_switch_pathgroup (struct multipath * mpp) > return 0; > > mpp->bestpg = bestpg; > - if (mpp->bestpg != mpp->nextpg) > - return 1; > + *need_reload = !path_groups_in_order(mpp); > > - return 0; > + return (*need_reload || mpp->bestpg != mpp->nextpg); > } > > static void > @@ -1963,20 +1997,26 @@ ghost_delay_tick(struct vectors *vecs) > } > > static void > -deferred_failback_tick (vector mpvec) > +deferred_failback_tick (struct vectors *vecs) > { > struct multipath * mpp; > unsigned int i; > + bool need_reload; > > - vector_foreach_slot (mpvec, mpp, i) { > + vector_foreach_slot (vecs->mpvec, mpp, i) { > /* > * deferred failback getting sooner > */ > if (mpp->pgfailback > 0 && mpp->failback_tick > 0) { > mpp->failback_tick--; > > - if (!mpp->failback_tick && > need_switch_pathgroup(mpp)) > - switch_pathgroup(mpp); > + if (!mpp->failback_tick && > + need_switch_pathgroup(mpp, &need_reload)) > { > + if (need_reload) > + reload_and_sync_map(mpp, > vecs); > + else > + switch_pathgroup(mpp); > + } > } > } > } > @@ -2219,6 +2259,7 @@ check_path (struct vectors * vecs, struct path > * pp, unsigned int ticks) > struct config *conf; > int marginal_pathgroups, marginal_changed = 0; > int ret; > + bool need_reload; > > if (((pp->initialized == INIT_OK || pp->initialized == > INIT_PARTIAL || > pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp) || > @@ -2548,13 +2589,17 @@ check_path (struct vectors * vecs, struct > path * pp, unsigned int ticks) > condlog(2, "%s: path priorities changed. reloading", > pp->mpp->alias); > reload_and_sync_map(pp->mpp, vecs); > - } else if (need_switch_pathgroup(pp->mpp)) { > + } else if (need_switch_pathgroup(pp->mpp, &need_reload)) { > if (pp->mpp->pgfailback > 0 && > (new_path_up || pp->mpp->failback_tick <= 0)) > pp->mpp->failback_tick = pp->mpp->pgfailback > + 1; > else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE > || > - (chkr_new_path_up && > followover_should_failback(pp))) > - switch_pathgroup(pp->mpp); > + (chkr_new_path_up && > followover_should_failback(pp))) { > + if (need_reload) > + reload_and_sync_map(pp->mpp, vecs); > + else > + switch_pathgroup(pp->mpp); > + } > } > return 1; > } > @@ -2680,7 +2725,7 @@ unlock: > pthread_cleanup_push(cleanup_lock, &vecs->lock); > lock(&vecs->lock); > pthread_testcancel(); > - deferred_failback_tick(vecs->mpvec); > + deferred_failback_tick(vecs); > retry_count_tick(vecs->mpvec); > missing_uev_wait_tick(vecs); > ghost_delay_tick(vecs); -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel