need_switch_pathgroup() only checks if the currently used pathgroup is not the highest priority pathgroup. If it isn't, all multipathd does is instruct the kernel to switch to the correct pathgroup. However, the kernel treats the pathgroups as if they were ordered by priority. When the kernel runs out of paths to use in the currently selected pathgroup, it will start checking the pathgroups in order until it finds one with usable paths. need_switch_pathgroup() should also check if the pathgroups are out of order, and if so, multipathd should reload the map to reorder them correctly. Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> --- multipathd/main.c | 70 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/multipathd/main.c b/multipathd/main.c index f603d143..584be895 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -395,11 +395,47 @@ void put_multipath_config(__attribute__((unused)) void *arg) rcu_read_unlock(); } +/* + * The path group orderings that this function finds acceptible are different + * from now select_path_group determines the best pathgroup. The idea here is + * to only trigger a kernel reload when it is obvious that the pathgroups would + * be out of order, even if all the paths were usable. Thus pathgroups with + * PRIO_UNDEF are skipped, and the number of enabled paths doesn't matter here. + */ +bool path_groups_in_order(struct multipath *mpp) +{ + int i; + struct pathgroup *pgp; + bool seen_marginal_pg = false; + int last_prio = INT_MAX; + + if (VECTOR_SIZE(mpp->pg) < 2) + return true; + + vector_foreach_slot(mpp->pg, pgp, i) { + if (seen_marginal_pg && !pgp->marginal) + return false; + /* skip pgs with PRIO_UNDEF, since this is likely temporary */ + if (!pgp->paths || pgp->priority == PRIO_UNDEF) + continue; + if (pgp->marginal && !seen_marginal_pg) { + seen_marginal_pg = true; + last_prio = pgp->priority; + continue; + } + if (pgp->priority > last_prio) + return false; + last_prio = pgp->priority; + } + return true; +} + static int -need_switch_pathgroup (struct multipath * mpp) +need_switch_pathgroup (struct multipath * mpp, bool *need_reload) { int bestpg; + *need_reload = false; if (!mpp) return 0; @@ -411,10 +447,9 @@ need_switch_pathgroup (struct multipath * mpp) return 0; mpp->bestpg = bestpg; - if (mpp->bestpg != mpp->nextpg) - return 1; + *need_reload = !path_groups_in_order(mpp); - return 0; + return (*need_reload || mpp->bestpg != mpp->nextpg); } static void @@ -1963,20 +1998,26 @@ ghost_delay_tick(struct vectors *vecs) } static void -deferred_failback_tick (vector mpvec) +deferred_failback_tick (struct vectors *vecs) { struct multipath * mpp; unsigned int i; + bool need_reload; - vector_foreach_slot (mpvec, mpp, i) { + vector_foreach_slot (vecs->mpvec, mpp, i) { /* * deferred failback getting sooner */ if (mpp->pgfailback > 0 && mpp->failback_tick > 0) { mpp->failback_tick--; - if (!mpp->failback_tick && need_switch_pathgroup(mpp)) - switch_pathgroup(mpp); + if (!mpp->failback_tick && + need_switch_pathgroup(mpp, &need_reload)) { + if (need_reload) + reload_and_sync_map(mpp, vecs); + else + switch_pathgroup(mpp); + } } } } @@ -2219,6 +2260,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) struct config *conf; int marginal_pathgroups, marginal_changed = 0; int ret; + bool need_reload; if (((pp->initialized == INIT_OK || pp->initialized == INIT_PARTIAL || pp->initialized == INIT_REQUESTED_UDEV) && !pp->mpp) || @@ -2548,13 +2590,17 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) condlog(2, "%s: path priorities changed. reloading", pp->mpp->alias); reload_and_sync_map(pp->mpp, vecs); - } else if (need_switch_pathgroup(pp->mpp)) { + } else if (need_switch_pathgroup(pp->mpp, &need_reload)) { if (pp->mpp->pgfailback > 0 && (new_path_up || pp->mpp->failback_tick <= 0)) pp->mpp->failback_tick = pp->mpp->pgfailback + 1; else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE || - (chkr_new_path_up && followover_should_failback(pp))) - switch_pathgroup(pp->mpp); + (chkr_new_path_up && followover_should_failback(pp))) { + if (need_reload) + reload_and_sync_map(pp->mpp, vecs); + else + switch_pathgroup(pp->mpp); + } } return 1; } @@ -2680,7 +2726,7 @@ unlock: pthread_cleanup_push(cleanup_lock, &vecs->lock); lock(&vecs->lock); pthread_testcancel(); - deferred_failback_tick(vecs->mpvec); + deferred_failback_tick(vecs); retry_count_tick(vecs->mpvec); missing_uev_wait_tick(vecs); ghost_delay_tick(vecs); -- 2.17.2 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel