On Wed, 2024-07-17 at 14:11 -0400, Benjamin Marzinski wrote: > multipathd's path checking can be very bursty, with all the paths > running their path checkers at the same time, and then all doing > nothing > while waiting for the next check time. Alternatively, the paths in a > multipath device might all run their path checkers at a different > time, > which can keep the multipath device from having a coherent view of > the > state of all of its paths. > > This patch makes all the paths of a multipath device converge to > running > their checkers at the same time, and spreads out when this time is > for > the different multipath devices. > > To do this, the checking time is divided into adjustment intervals > (conf->adjust_int), so that the checkers run at some index within > this > interval. conf->adjust_int is chosen so that it is a multiple of all > possible pp->checkint values. This means that regardless of > pp->checkint, the path should always be checked on the same indexes, > each adjustement interval. > > Each multipath device has a goal index. These are evenly spread out > between 0 and conf->max_checkint. Every conf->adjust_int seconds, > each > multipath device should try to check all of its paths on its goal > index. > If the multipath device's check times are not aligned with the goal > index, then pp->tick for the next check will be decremented by one, > to > align it over time. > > In order for the path checkers to run every pp->checkint seconds, > multipathd needs to track how long a path check has been pending for, > and subtract that time from the number of ticks till the checker is > run > again. If the checker has been pending for more that pp->checkint, > the path will be rechecked on the next tick after the checker > returns. > > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> > --- > libmultipath/config.c | 12 ++++++ > libmultipath/config.h | 1 + > libmultipath/structs.c | 1 + > libmultipath/structs.h | 1 + > multipathd/main.c | 91 ++++++++++++++++++++++++++++++++++------ > -- > 5 files changed, 89 insertions(+), 17 deletions(-) > > diff --git a/libmultipath/config.c b/libmultipath/config.c > index 83fa7369..a59533b5 100644 > --- a/libmultipath/config.c > +++ b/libmultipath/config.c > @@ -982,6 +982,18 @@ int _init_config (const char *file, struct > config *conf) > conf->checkint = conf->max_checkint; > condlog(3, "polling interval: %d, max: %d", > conf->checkint, conf->max_checkint); > + /* > + * make sure that that adjust_int is a multiple of all > possible values > + * of pp->checkint. > + */ > + if (conf->max_checkint % conf->checkint == 0) { > + conf->adjust_int = conf->max_checkint; > + } else { > + conf->adjust_int = conf->checkint; > + while (2 * conf->adjust_int < conf->max_checkint) > + conf->adjust_int *= 2; > + conf->adjust_int *= conf->max_checkint; > + } > > if (conf->blist_devnode == NULL) { > conf->blist_devnode = vector_alloc(); > diff --git a/libmultipath/config.h b/libmultipath/config.h > index 384193ab..800c0ca9 100644 > --- a/libmultipath/config.h > +++ b/libmultipath/config.h > @@ -147,6 +147,7 @@ struct config { > int minio_rq; > unsigned int checkint; > unsigned int max_checkint; > + unsigned int adjust_int; > bool use_watchdog; > int pgfailback; > int rr_weight; > diff --git a/libmultipath/structs.c b/libmultipath/structs.c > index 0a26096a..232b4230 100644 > --- a/libmultipath/structs.c > +++ b/libmultipath/structs.c > @@ -149,6 +149,7 @@ uninitialize_path(struct path *pp) > pp->state = PATH_UNCHECKED; > pp->uid_attribute = NULL; > pp->checker_timeout = 0; > + pp->pending_ticks = 0; > > if (checker_selected(&pp->checker)) > checker_put(&pp->checker); > diff --git a/libmultipath/structs.h b/libmultipath/structs.h > index 002eeae1..457d7836 100644 > --- a/libmultipath/structs.h > +++ b/libmultipath/structs.h > @@ -360,6 +360,7 @@ struct path { > unsigned long long size; > unsigned int checkint; > unsigned int tick; > + unsigned int pending_ticks; > int bus; > int offline; > int state; > diff --git a/multipathd/main.c b/multipathd/main.c > index 84450906..87ddb101 100644 > --- a/multipathd/main.c > +++ b/multipathd/main.c > @@ -2388,7 +2388,7 @@ enum check_path_return { > }; > > static int > -check_path (struct vectors * vecs, struct path * pp, unsigned int > ticks) > +do_check_path (struct vectors * vecs, struct path * pp) > { > int newstate; > int new_path_up = 0; > @@ -2400,14 +2400,6 @@ check_path (struct vectors * vecs, struct path > * pp, unsigned int ticks) > int marginal_pathgroups, marginal_changed = 0; > bool need_reload; > > - if (pp->initialized == INIT_REMOVED) > - return CHECK_PATH_SKIPPED; > - > - if (pp->tick) > - pp->tick -= (pp->tick > ticks) ? ticks : pp->tick; > - if (pp->tick) > - return CHECK_PATH_SKIPPED; > - > conf = get_multipath_config(); > checkint = conf->checkint; > max_checkint = conf->max_checkint; > @@ -2419,12 +2411,6 @@ check_path (struct vectors * vecs, struct path > * pp, unsigned int ticks) > pp->checkint = checkint; > }; > > - /* > - * provision a next check soonest, > - * in case we exit abnormally from here > - */ > - pp->tick = checkint; > - > newstate = check_path_state(pp); > if (newstate == PATH_WILD || newstate == PATH_UNCHECKED) > return CHECK_PATH_SKIPPED; > @@ -2587,7 +2573,6 @@ check_path (struct vectors * vecs, struct path > * pp, unsigned int ticks) > condlog(4, "%s: delay next check > %is", > pp->dev_t, pp->checkint); > } > - pp->tick = pp->checkint; > } > } > else if (newstate != PATH_UP && newstate != PATH_GHOST) { > @@ -2640,6 +2625,77 @@ check_path (struct vectors * vecs, struct path > * pp, unsigned int ticks) > return CHECK_PATH_CHECKED; > } > > +static int > +check_path (struct vectors * vecs, struct path * pp, unsigned int > ticks, > + time_t start_secs) > +{ > + int r; > + unsigned int adjust_int, max_checkint; > + struct config *conf; > + time_t next_idx, goal_idx; > + > + if (pp->initialized == INIT_REMOVED) > + return CHECK_PATH_SKIPPED; > + > + if (pp->tick) > + pp->tick -= (pp->tick > ticks) ? ticks : pp->tick; > + if (pp->tick) > + return CHECK_PATH_SKIPPED; > + > + conf = get_multipath_config(); > + max_checkint = conf->max_checkint; > + adjust_int = conf->adjust_int; > + put_multipath_config(conf); > + > + r = do_check_path(vecs, pp); > + > + /* > + * do_check_path() removed or orphaned the path. > + */ > + if (r < 0 || !pp->mpp) > + return r; do_check_path() returns an enum now, so this should be r == CHECK_PATH_REMOVED.