On Wed, Dec 11, 2024 at 11:58:58PM +0100, Martin Wilck wrote: > Rather than calling sync_mpp early in the checkerloop and tracking > map synchronization with synced_count, call sync_mpp() in the CHECKER_FINISHED > state, if either at least one path of the map has been checked in the current > iteration, or the sync tick has expired. This avoids potentially deleting > paths from the pathvec through the do_sync_mpp() -> update_multipath_strings() > -> sync_paths -> check_removed_paths() call chain while we're iterating over > the pathvec. Also, the time gap between obtaining path states and syncing > the state with the kernel is smaller this way. > > Suggested-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> > --- > libmultipath/structs.h | 2 +- > libmultipath/structs_vec.c | 1 - > multipathd/main.c | 26 +++++++++++--------------- > 3 files changed, 12 insertions(+), 17 deletions(-) > > diff --git a/libmultipath/structs.h b/libmultipath/structs.h > index 6a30c59..9d22bdd 100644 > --- a/libmultipath/structs.h > +++ b/libmultipath/structs.h > @@ -471,7 +471,7 @@ struct multipath { > int ghost_delay_tick; > int queue_mode; > unsigned int sync_tick; > - int synced_count; > + int checker_count; > enum prio_update_type prio_update; > uid_t uid; > gid_t gid; > diff --git a/libmultipath/structs_vec.c b/libmultipath/structs_vec.c > index 7a4e3eb..6aa744d 100644 > --- a/libmultipath/structs_vec.c > +++ b/libmultipath/structs_vec.c > @@ -530,7 +530,6 @@ update_multipath_table (struct multipath *mpp, vector pathvec, int flags) > conf = get_multipath_config(); > mpp->sync_tick = conf->max_checkint; > put_multipath_config(conf); > - mpp->synced_count++; > > r = libmp_mapinfo(DM_MAP_BY_NAME | MAPINFO_MPATH_ONLY, > (mapid_t) { .str = mpp->alias }, > diff --git a/multipathd/main.c b/multipathd/main.c > index 4a28fbb..e4e6bf7 100644 > --- a/multipathd/main.c > +++ b/multipathd/main.c > @@ -2470,7 +2470,7 @@ sync_mpp(struct vectors * vecs, struct multipath *mpp, unsigned int ticks) > if (mpp->sync_tick) > mpp->sync_tick -= (mpp->sync_tick > ticks) ? ticks : > mpp->sync_tick; > - if (mpp->sync_tick) > + if (mpp->sync_tick && !mpp->checker_count) > return; > > do_sync_mpp(vecs, mpp); > @@ -2513,12 +2513,6 @@ update_path_state (struct vectors * vecs, struct path * pp) > return handle_path_wwid_change(pp, vecs)? CHECK_PATH_REMOVED : > CHECK_PATH_SKIPPED; > } > - if (pp->mpp->synced_count == 0) { > - do_sync_mpp(vecs, pp->mpp); > - /* if update_multipath_strings orphaned the path, quit early */ > - if (!pp->mpp) > - return CHECK_PATH_SKIPPED; > - } > if ((newstate != PATH_UP && newstate != PATH_GHOST && > newstate != PATH_PENDING) && (pp->state == PATH_DELAYED)) { > /* If path state become failed again cancel path delay state */ > @@ -2918,9 +2912,11 @@ check_paths(struct vectors *vecs, unsigned int ticks) > vector_foreach_slot(vecs->pathvec, pp, i) { > if (pp->is_checked != CHECK_PATH_UNCHECKED) > continue; > - if (pp->mpp) > + if (pp->mpp) { > pp->is_checked = check_path(pp, ticks); > - else > + if (pp->is_checked == CHECK_PATH_STARTED) > + pp->mpp->checker_count++; > + } else > pp->is_checked = check_uninitialized_path(pp, ticks); > if (pp->is_checked == CHECK_PATH_STARTED && > checker_need_wait(&pp->checker)) > @@ -3014,12 +3010,10 @@ checkerloop (void *ap) > pthread_cleanup_push(cleanup_lock, &vecs->lock); > lock(&vecs->lock); > pthread_testcancel(); > - vector_foreach_slot(vecs->mpvec, mpp, i) > - mpp->synced_count = 0; > if (checker_state == CHECKER_STARTING) { > vector_foreach_slot(vecs->mpvec, mpp, i) { > - sync_mpp(vecs, mpp, ticks); > mpp->prio_update = PRIO_UPDATE_NONE; > + mpp->checker_count = 0; > } > vector_foreach_slot(vecs->pathvec, pp, i) > pp->is_checked = CHECK_PATH_UNCHECKED; > @@ -3032,11 +3026,13 @@ checkerloop (void *ap) > start_time.tv_sec); > if (checker_state == CHECKER_FINISHED) { > vector_foreach_slot(vecs->mpvec, mpp, i) { > - if ((update_mpp_prio(mpp) || > - (mpp->need_reload && mpp->synced_count > 0)) && When you call reload_and_sync_map(), it will automatically resync the map via setup_multipath() -> refresh_multipath() -> update_multipath_strings(). This means that if for some reason multipathd and the kernel disagree about a map, and reloading it doesn't fix the problem, you will immediately set mpp->need_reload again. With the old mpp->synced_count check, you only reload maps with need_reload() when a path is checked. Without this check, or a (mpp->checker_count > 0) check to replace it, you will keep reloading these maps every loop, roughly once a second. I would rather not do this. If you want to make sure to immediately handle a need_reload that wasn't triggered by this call to reload_and_sync_map() which was because of an earlier need_reload, we could make need_reload have three states, to distinguish between a reload we want done immediately, and one we would like to wait on because we just did a reload and it didn't fix the problem. Then we could remember if need_reload was set before calling reload_and_sync_map(), and if it was, and if it is still set after, we could switch it to the delayed version. Or perhaps I'm just being paranoid here. -Ben > - reload_and_sync_map(mpp, vecs) == 2) > + sync_mpp(vecs, mpp, ticks); > + if ((update_mpp_prio(mpp) || mpp->need_reload) && > + reload_and_sync_map(mpp, vecs) == 2) { > /* multipath device deleted */ > i--; > + continue; > + } > } > } > lock_cleanup_pop(vecs->lock); > -- > 2.47.0