need_switch_pathgroup() only checks if the currently used pathgroup is not the highest priority pathgroup. If it isn't, all multipathd does is instruct the kernel to switch to the correct pathgroup. However, the kernel treats the pathgroups as if they were ordered by priority. When the kernel runs out of paths to use in the currently selected pathgroup, it will start checking the pathgroups in order until it finds one with usable paths. need_switch_pathgroup() should also check if the pathgroups are out of order, and if so, multipathd should reload the map to reorder them correctly. Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> --- libmultipath/libmultipath.version | 5 ++++ libmultipath/switchgroup.c | 27 ++++++++++++++++++++++ libmultipath/switchgroup.h | 1 + multipathd/main.c | 38 +++++++++++++++++++++---------- 4 files changed, 59 insertions(+), 12 deletions(-) diff --git a/libmultipath/libmultipath.version b/libmultipath/libmultipath.version index 8f72c452..38074699 100644 --- a/libmultipath/libmultipath.version +++ b/libmultipath/libmultipath.version @@ -237,3 +237,8 @@ global: local: *; }; + +LIBMULTIPATH_19.1.0 { +global: + path_groups_in_order; +} LIBMULTIPATH_19.0.0; diff --git a/libmultipath/switchgroup.c b/libmultipath/switchgroup.c index b1e1f39b..b1180839 100644 --- a/libmultipath/switchgroup.c +++ b/libmultipath/switchgroup.c @@ -7,6 +7,33 @@ #include "structs.h" #include "switchgroup.h" +bool path_groups_in_order(struct multipath *mpp) +{ + int i; + struct pathgroup *pgp; + bool seen_marginal_pg = false; + int last_prio = INT_MAX; + + if (VECTOR_SIZE(mpp->pg) < 2) + return true; + + vector_foreach_slot(mpp->pg, pgp, i) { + /* skip pgs with PRIO_UNDEF, since this is likely temporary */ + if (!pgp->paths || pgp->priority == PRIO_UNDEF) + continue; + if (pgp->marginal && !seen_marginal_pg) { + last_prio = INT_MAX; + continue; + } + if (seen_marginal_pg && !pgp->marginal) + return false; + if (pgp->priority > last_prio) + return false; + last_prio = pgp->priority; + } + return true; +} + void path_group_prio_update(struct pathgroup *pgp) { int i; diff --git a/libmultipath/switchgroup.h b/libmultipath/switchgroup.h index 9365e2e2..43dbb6c9 100644 --- a/libmultipath/switchgroup.h +++ b/libmultipath/switchgroup.h @@ -1,2 +1,3 @@ void path_group_prio_update (struct pathgroup * pgp); int select_path_group (struct multipath * mpp); +bool path_groups_in_order(struct multipath *mpp); diff --git a/multipathd/main.c b/multipathd/main.c index e7c272ad..2ea7c76b 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -396,7 +396,7 @@ void put_multipath_config(__attribute__((unused)) void *arg) } static int -need_switch_pathgroup (struct multipath * mpp, int refresh) +need_switch_pathgroup (struct multipath * mpp, int refresh, bool *need_reload) { struct pathgroup * pgp; struct path * pp; @@ -404,6 +404,7 @@ need_switch_pathgroup (struct multipath * mpp, int refresh) struct config *conf; int bestpg; + *need_reload = false; if (!mpp) return 0; @@ -430,10 +431,9 @@ need_switch_pathgroup (struct multipath * mpp, int refresh) return 0; mpp->bestpg = bestpg; - if (mpp->bestpg != mpp->nextpg) - return 1; + *need_reload = !path_groups_in_order(mpp); - return 0; + return (*need_reload || mpp->bestpg != mpp->nextpg); } static void @@ -1982,20 +1982,26 @@ ghost_delay_tick(struct vectors *vecs) } static void -deferred_failback_tick (vector mpvec) +deferred_failback_tick (struct vectors *vecs) { struct multipath * mpp; unsigned int i; + bool need_reload; - vector_foreach_slot (mpvec, mpp, i) { + vector_foreach_slot (vecs->mpvec, mpp, i) { /* * deferred failback getting sooner */ if (mpp->pgfailback > 0 && mpp->failback_tick > 0) { mpp->failback_tick--; - if (!mpp->failback_tick && need_switch_pathgroup(mpp, 1)) - switch_pathgroup(mpp); + if (!mpp->failback_tick && + need_switch_pathgroup(mpp, 1, &need_reload)) { + if (need_reload) + reload_and_sync_map(mpp, vecs, 0); + else + switch_pathgroup(mpp); + } } } } @@ -2579,6 +2585,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) int prio_changed = update_prio(pp, new_path_up); bool need_refresh = (!new_path_up && prio_changed && pp->priority != PRIO_UNDEF); + bool need_reload; if (prio_changed && pp->mpp->pgpolicyfn == (pgpolicyfn *)group_by_prio && @@ -2586,15 +2593,22 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) condlog(2, "%s: path priorities changed. reloading", pp->mpp->alias); reload_and_sync_map(pp->mpp, vecs, !new_path_up); - } else if (need_switch_pathgroup(pp->mpp, need_refresh)) { + } else if (need_switch_pathgroup(pp->mpp, need_refresh, + &need_reload)) { if (pp->mpp->pgfailback > 0 && (new_path_up || pp->mpp->failback_tick <= 0)) pp->mpp->failback_tick = pp->mpp->pgfailback + 1; else if (pp->mpp->pgfailback == -FAILBACK_IMMEDIATE || (chkr_new_path_up && - followover_should_failback(pp))) - switch_pathgroup(pp->mpp); + followover_should_failback(pp))) { + if (need_reload) + reload_and_sync_map(pp->mpp, vecs, + !need_refresh && + !new_path_up); + else + switch_pathgroup(pp->mpp); + } } } return 1; @@ -2720,7 +2734,7 @@ unlock: pthread_cleanup_push(cleanup_lock, &vecs->lock); lock(&vecs->lock); pthread_testcancel(); - deferred_failback_tick(vecs->mpvec); + deferred_failback_tick(vecs); retry_count_tick(vecs->mpvec); missing_uev_wait_tick(vecs); ghost_delay_tick(vecs); -- 2.17.2 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel