Hi Martin, In one of the patch "[PATCH 00/19] san_path_err & multipath ANA support" you have mentioned that san_path_err_XXX has some merits over marginal_path_err_XXX. Is this understanding correct if so could you please explain the scenario in which use case this was better. I can say Marginal_path_err_xx is superset of san_path_err_xx. If we need both san_path_err_xx , Marginal_path_err_xx then so many configurations will really confuse the customers. Regards, Muneendra. -----Original Message----- From: Martin Wilck [mailto:mwilck@xxxxxxxx] Sent: Wednesday, December 19, 2018 4:49 AM To: Christophe Varoqui <christophe.varoqui@xxxxxxxxxxx> Cc: Benjamin Marzinski <bmarzins@xxxxxxxxxx>; dm-devel@xxxxxxxxxx; Hannes Reinecke <hare@xxxxxxx>; Martin Wilck <mwilck@xxxxxxxx>; Guan Junxiong <guanjunxiong@xxxxxxxxxx>; M Muneendra Kumar <mmandala@xxxxxxxxxxx> Subject: [PATCH 04/19] Revert "multipath-tools: discard san_path_err_XXX feature" This reverts commit 9cf6a48f18a291982af34b4fb0110654b94e591c. We removed this functionality prematurely. I am not convinced that the "marginal_path" code really replaces it. Let customers evaluate the different options, and vote with their feet. Cc: Guan Junxiong <guanjunxiong@xxxxxxxxxx> Cc: M Muneendra Kumar <mmandala@xxxxxxxxxxx> Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> --- libmultipath/config.c | 3 ++ libmultipath/config.h | 9 ++++ libmultipath/configure.c | 3 ++ libmultipath/dict.c | 39 ++++++++++++++++++ libmultipath/propsel.c | 53 ++++++++++++++++++++++++ libmultipath/propsel.h | 3 ++ libmultipath/structs.h | 7 ++++ multipath/multipath.conf.5 | 57 ++++++++++++++++++++++++++ multipathd/main.c | 84 ++++++++++++++++++++++++++++++++++++++ 9 files changed, 258 insertions(+) diff --git a/libmultipath/config.c b/libmultipath/config.c index 5af7af58..24d71aed 100644 --- a/libmultipath/config.c +++ b/libmultipath/config.c @@ -369,6 +369,9 @@ merge_hwe (struct hwentry * dst, struct hwentry * src) merge_num(max_sectors_kb); merge_num(ghost_delay); merge_num(all_tg_pt); + merge_num(san_path_err_threshold); + merge_num(san_path_err_forget_rate); + merge_num(san_path_err_recovery_time); snprintf(id, sizeof(id), "%s/%s", dst->vendor, dst->product); reconcile_features_with_options(id, &dst->features, diff --git a/libmultipath/config.h b/libmultipath/config.h index 7d0cd9a6..b938c26c 100644 --- a/libmultipath/config.h +++ b/libmultipath/config.h @@ -76,6 +76,9 @@ struct hwentry { int deferred_remove; int delay_watch_checks; int delay_wait_checks; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; int marginal_path_err_sample_time; int marginal_path_err_rate_threshold; int marginal_path_err_recheck_gap_time; @@ -112,6 +115,9 @@ struct mpentry { int deferred_remove; int delay_watch_checks; int delay_wait_checks; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; int marginal_path_err_sample_time; int marginal_path_err_rate_threshold; int marginal_path_err_recheck_gap_time; @@ -162,6 +168,9 @@ struct config { int processed_main_config; int delay_watch_checks; int delay_wait_checks; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; int marginal_path_err_sample_time; int marginal_path_err_rate_threshold; int marginal_path_err_recheck_gap_time; diff --git a/libmultipath/configure.c b/libmultipath/configure.c index 84ae5f56..60a98873 100644 --- a/libmultipath/configure.c +++ b/libmultipath/configure.c @@ -309,6 +309,9 @@ int setup_map(struct multipath *mpp, char *params, int params_size, select_deferred_remove(conf, mpp); select_delay_watch_checks(conf, mpp); select_delay_wait_checks(conf, mpp); + select_san_path_err_threshold(conf, mpp); + select_san_path_err_forget_rate(conf, mpp); + select_san_path_err_recovery_time(conf, mpp); select_marginal_path_err_sample_time(conf, mpp); select_marginal_path_err_rate_threshold(conf, mpp); select_marginal_path_err_recheck_gap_time(conf, mpp); diff --git a/libmultipath/dict.c b/libmultipath/dict.c index a81c051f..fd29abca 100644 --- a/libmultipath/dict.c +++ b/libmultipath/dict.c @@ -1217,6 +1217,33 @@ declare_hw_handler(delay_wait_checks, set_off_int_undef) declare_hw_snprint(delay_wait_checks, print_off_int_undef) declare_mp_handler(delay_wait_checks, set_off_int_undef) declare_mp_snprint(delay_wait_checks, print_off_int_undef) +declare_def_handler(san_path_err_threshold, set_off_int_undef) +declare_def_snprint_defint(san_path_err_threshold, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(san_path_err_threshold, set_off_int_undef) +declare_ovr_snprint(san_path_err_threshold, print_off_int_undef) +declare_hw_handler(san_path_err_threshold, set_off_int_undef) +declare_hw_snprint(san_path_err_threshold, print_off_int_undef) +declare_mp_handler(san_path_err_threshold, set_off_int_undef) +declare_mp_snprint(san_path_err_threshold, print_off_int_undef) +declare_def_handler(san_path_err_forget_rate, set_off_int_undef) +declare_def_snprint_defint(san_path_err_forget_rate, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(san_path_err_forget_rate, set_off_int_undef) +declare_ovr_snprint(san_path_err_forget_rate, print_off_int_undef) +declare_hw_handler(san_path_err_forget_rate, set_off_int_undef) +declare_hw_snprint(san_path_err_forget_rate, print_off_int_undef) +declare_mp_handler(san_path_err_forget_rate, set_off_int_undef) +declare_mp_snprint(san_path_err_forget_rate, print_off_int_undef) +declare_def_handler(san_path_err_recovery_time, set_off_int_undef) +declare_def_snprint_defint(san_path_err_recovery_time, print_off_int_undef, + DEFAULT_ERR_CHECKS) +declare_ovr_handler(san_path_err_recovery_time, set_off_int_undef) +declare_ovr_snprint(san_path_err_recovery_time, print_off_int_undef) +declare_hw_handler(san_path_err_recovery_time, set_off_int_undef) +declare_hw_snprint(san_path_err_recovery_time, print_off_int_undef) +declare_mp_handler(san_path_err_recovery_time, set_off_int_undef) +declare_mp_snprint(san_path_err_recovery_time, print_off_int_undef) declare_def_handler(marginal_path_err_sample_time, set_off_int_undef) declare_def_snprint_defint(marginal_path_err_sample_time, print_off_int_undef, DEFAULT_ERR_CHECKS) @@ -1620,6 +1647,9 @@ init_keywords(vector keywords) install_keyword("config_dir", &def_config_dir_handler, &snprint_def_config_dir); install_keyword("delay_watch_checks", &def_delay_watch_checks_handler, &snprint_def_delay_watch_checks); install_keyword("delay_wait_checks", &def_delay_wait_checks_handler, &snprint_def_delay_wait_checks); + install_keyword("san_path_err_threshold", &def_san_path_err_threshold_handler, &snprint_def_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &def_san_path_err_forget_rate_handler, &snprint_def_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", +&def_san_path_err_recovery_time_handler, +&snprint_def_san_path_err_recovery_time); install_keyword("marginal_path_err_sample_time", &def_marginal_path_err_sample_time_handler, &snprint_def_marginal_path_err_sample_time); install_keyword("marginal_path_err_rate_threshold", &def_marginal_path_err_rate_threshold_handler, &snprint_def_marginal_path_err_rate_threshold); install_keyword("marginal_path_err_recheck_gap_time", &def_marginal_path_err_recheck_gap_time_handler, &snprint_def_marginal_path_err_recheck_gap_time); @@ -1714,6 +1744,9 @@ init_keywords(vector keywords) install_keyword("deferred_remove", &hw_deferred_remove_handler, &snprint_hw_deferred_remove); install_keyword("delay_watch_checks", &hw_delay_watch_checks_handler, &snprint_hw_delay_watch_checks); install_keyword("delay_wait_checks", &hw_delay_wait_checks_handler, &snprint_hw_delay_wait_checks); + install_keyword("san_path_err_threshold", &hw_san_path_err_threshold_handler, &snprint_hw_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &hw_san_path_err_forget_rate_handler, &snprint_hw_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", +&hw_san_path_err_recovery_time_handler, +&snprint_hw_san_path_err_recovery_time); install_keyword("marginal_path_err_sample_time", &hw_marginal_path_err_sample_time_handler, &snprint_hw_marginal_path_err_sample_time); install_keyword("marginal_path_err_rate_threshold", &hw_marginal_path_err_rate_threshold_handler, &snprint_hw_marginal_path_err_rate_threshold); install_keyword("marginal_path_err_recheck_gap_time", &hw_marginal_path_err_recheck_gap_time_handler, &snprint_hw_marginal_path_err_recheck_gap_time); @@ -1750,6 +1783,9 @@ init_keywords(vector keywords) install_keyword("deferred_remove", &ovr_deferred_remove_handler, &snprint_ovr_deferred_remove); install_keyword("delay_watch_checks", &ovr_delay_watch_checks_handler, &snprint_ovr_delay_watch_checks); install_keyword("delay_wait_checks", &ovr_delay_wait_checks_handler, &snprint_ovr_delay_wait_checks); + install_keyword("san_path_err_threshold", &ovr_san_path_err_threshold_handler, &snprint_ovr_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &ovr_san_path_err_forget_rate_handler, &snprint_ovr_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", +&ovr_san_path_err_recovery_time_handler, +&snprint_ovr_san_path_err_recovery_time); install_keyword("marginal_path_err_sample_time", &ovr_marginal_path_err_sample_time_handler, &snprint_ovr_marginal_path_err_sample_time); install_keyword("marginal_path_err_rate_threshold", &ovr_marginal_path_err_rate_threshold_handler, &snprint_ovr_marginal_path_err_rate_threshold); install_keyword("marginal_path_err_recheck_gap_time", &ovr_marginal_path_err_recheck_gap_time_handler, &snprint_ovr_marginal_path_err_recheck_gap_time); @@ -1785,6 +1821,9 @@ init_keywords(vector keywords) install_keyword("deferred_remove", &mp_deferred_remove_handler, &snprint_mp_deferred_remove); install_keyword("delay_watch_checks", &mp_delay_watch_checks_handler, &snprint_mp_delay_watch_checks); install_keyword("delay_wait_checks", &mp_delay_wait_checks_handler, &snprint_mp_delay_wait_checks); + install_keyword("san_path_err_threshold", &mp_san_path_err_threshold_handler, &snprint_mp_san_path_err_threshold); + install_keyword("san_path_err_forget_rate", &mp_san_path_err_forget_rate_handler, &snprint_mp_san_path_err_forget_rate); + install_keyword("san_path_err_recovery_time", +&mp_san_path_err_recovery_time_handler, +&snprint_mp_san_path_err_recovery_time); install_keyword("marginal_path_err_sample_time", &mp_marginal_path_err_sample_time_handler, &snprint_mp_marginal_path_err_sample_time); install_keyword("marginal_path_err_rate_threshold", &mp_marginal_path_err_rate_threshold_handler, &snprint_mp_marginal_path_err_rate_threshold); install_keyword("marginal_path_err_recheck_gap_time", &mp_marginal_path_err_recheck_gap_time_handler, &snprint_mp_marginal_path_err_recheck_gap_time); diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c index 7b19fed0..a4d114c0 100644 --- a/libmultipath/propsel.c +++ b/libmultipath/propsel.c @@ -879,6 +879,59 @@ out: } +int select_san_path_err_threshold(struct config *conf, struct multipath +*mp) { + const char *origin; + char buff[12]; + + mp_set_mpe(san_path_err_threshold); + mp_set_ovr(san_path_err_threshold); + mp_set_hwe(san_path_err_threshold); + mp_set_conf(san_path_err_threshold); + mp_set_default(san_path_err_threshold, DEFAULT_ERR_CHECKS); +out: + print_off_int_undef(buff, 12, mp->san_path_err_threshold); + condlog(3, "%s: san_path_err_threshold = %s %s", mp->alias, buff, + origin); + return 0; +} + +int select_san_path_err_forget_rate(struct config *conf, struct +multipath *mp) { + const char *origin; + char buff[12]; + + mp_set_mpe(san_path_err_forget_rate); + mp_set_ovr(san_path_err_forget_rate); + mp_set_hwe(san_path_err_forget_rate); + mp_set_conf(san_path_err_forget_rate); + mp_set_default(san_path_err_forget_rate, DEFAULT_ERR_CHECKS); +out: + print_off_int_undef(buff, 12, mp->san_path_err_forget_rate); + condlog(3, "%s: san_path_err_forget_rate = %s %s", mp->alias, + buff, origin); + return 0; + +} + +int select_san_path_err_recovery_time(struct config *conf, struct +multipath *mp) { + const char *origin; + char buff[12]; + + mp_set_mpe(san_path_err_recovery_time); + mp_set_ovr(san_path_err_recovery_time); + mp_set_hwe(san_path_err_recovery_time); + mp_set_conf(san_path_err_recovery_time); + mp_set_default(san_path_err_recovery_time, DEFAULT_ERR_CHECKS); +out: + print_off_int_undef(buff, 12, mp->san_path_err_recovery_time); + condlog(3, "%s: san_path_err_recovery_time = %s %s", mp->alias, + buff, origin); + return 0; + +} + int select_marginal_path_err_sample_time(struct config *conf, struct multipath *mp) { const char *origin; diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h index ae99b927..b352c16a 100644 --- a/libmultipath/propsel.h +++ b/libmultipath/propsel.h @@ -26,6 +26,9 @@ int select_delay_watch_checks (struct config *conf, struct multipath * mp); int select_delay_wait_checks (struct config *conf, struct multipath * mp); int select_skip_kpartx (struct config *conf, struct multipath * mp); int select_max_sectors_kb (struct config *conf, struct multipath * mp); +int select_san_path_err_forget_rate(struct config *conf, struct +multipath *mp); int select_san_path_err_threshold(struct config *conf, +struct multipath *mp); int select_san_path_err_recovery_time(struct +config *conf, struct multipath *mp); int select_marginal_path_err_sample_time(struct config *conf, struct multipath *mp); int select_marginal_path_err_rate_threshold(struct config *conf, struct multipath *mp); int select_marginal_path_err_recheck_gap_time(struct config *conf, struct multipath *mp); diff --git a/libmultipath/structs.h b/libmultipath/structs.h index d8961164..96df8c8a 100644 --- a/libmultipath/structs.h +++ b/libmultipath/structs.h @@ -280,6 +280,10 @@ struct path { int initialized; int retriggers; int wwid_changed; + unsigned int path_failures; + time_t dis_reinstate_time; + int disable_reinstate; + int san_path_err_forget_rate; time_t io_err_dis_reinstate_time; int io_err_disable_reinstate; int io_err_pathfail_cnt; @@ -318,6 +322,9 @@ struct multipath { int deferred_remove; int delay_watch_checks; int delay_wait_checks; + int san_path_err_threshold; + int san_path_err_forget_rate; + int san_path_err_recovery_time; int marginal_path_err_sample_time; int marginal_path_err_rate_threshold; int marginal_path_err_recheck_gap_time; diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5 index 68119baa..35e6d37c 100644 --- a/multipath/multipath.conf.5 +++ b/multipath/multipath.conf.5 @@ -891,6 +891,45 @@ The default is: \fB/etc/multipath/conf.d/\fR . . .TP +.B san_path_err_threshold +If set to a value greater than 0, multipathd will watch paths and check +how many times a path has been failed due to errors.If the number of +failures on a particular path is greater then the +san_path_err_threshold then the path will not reinstante till +san_path_err_recovery_time.These path failures should occur within a +san_path_err_forget_rate checks, if not we will consider the path is good enough to reinstantate. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B san_path_err_forget_rate +If set to a value greater than 0, multipathd will check whether the +path failures has exceeded the san_path_err_threshold within this many +checks i.e san_path_err_forget_rate . If so we will not reinstante the +path till san_path_err_recovery_time. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP +.B san_path_err_recovery_time +If set to a value greater than 0, multipathd will make sure that when +path failures has exceeded the san_path_err_threshold within +san_path_err_forget_rate then the path will be placed in failed state +for san_path_err_recovery_time duration.Once san_path_err_recovery_time has timeout we will reinstante the failed path . +san_path_err_recovery_time value should be in secs. +.RS +.TP +The default is: \fBno\fR +.RE +. +. +.TP .B marginal_path_double_failed_time One of the four parameters of supporting path check based on accounting IO error such as intermittent error. When a path failed event occurs twice in @@ -1297,6 +1336,12 @@ section: .TP .B deferred_remove .TP +.B san_path_err_threshold +.TP +.B san_path_err_forget_rate +.TP +.B san_path_err_recovery_time +.TP .B marginal_path_err_sample_time .TP .B marginal_path_err_rate_threshold @@ -1448,6 +1493,12 @@ section: .TP .B deferred_remove .TP +.B san_path_err_threshold +.TP +.B san_path_err_forget_rate +.TP +.B san_path_err_recovery_time +.TP .B marginal_path_err_sample_time .TP .B marginal_path_err_rate_threshold @@ -1524,6 +1575,12 @@ the values are taken from the \fIdevices\fR or \fIdefaults\fR sections: .TP .B deferred_remove .TP +.B san_path_err_threshold +.TP +.B san_path_err_forget_rate +.TP +.B san_path_err_recovery_time +.TP .B marginal_path_err_sample_time .TP .B marginal_path_err_rate_threshold diff --git a/multipathd/main.c b/multipathd/main.c index 99145293..57bb7143 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -1833,6 +1833,84 @@ int update_path_groups(struct multipath *mpp, struct vectors *vecs, int refresh) return 0; } +static int check_path_reinstate_state(struct path * pp) { + struct timespec curr_time; + if (!((pp->mpp->san_path_err_threshold > 0) && + (pp->mpp->san_path_err_forget_rate > 0) && + (pp->mpp->san_path_err_recovery_time >0))) { + return 0; + } + + if (pp->disable_reinstate) { + /* If we don't know how much time has passed, automatically + * reinstate the path, just to be safe. Also, if there are + * no other usable paths, reinstate the path + */ + if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0 || + pp->mpp->nr_active == 0) { + condlog(2, "%s : reinstating path early", pp->dev); + goto reinstate_path; + } + if ((curr_time.tv_sec - pp->dis_reinstate_time ) > pp->mpp->san_path_err_recovery_time) { + condlog(2,"%s : reinstate the path after err recovery time", pp->dev); + goto reinstate_path; + } + return 1; + } + /* forget errors on a working path */ + if ((pp->state == PATH_UP || pp->state == PATH_GHOST) && + pp->path_failures > 0) { + if (pp->san_path_err_forget_rate > 0){ + pp->san_path_err_forget_rate--; + } else { + /* for every san_path_err_forget_rate number of + * successful path checks decrement path_failures by 1 + */ + pp->path_failures--; + pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate; + } + return 0; + } + + /* If the path isn't recovering from a failed state, do nothing */ + if (pp->state != PATH_DOWN && pp->state != PATH_SHAKY && + pp->state != PATH_TIMEOUT) + return 0; + + if (pp->path_failures == 0) + pp->san_path_err_forget_rate = pp->mpp->san_path_err_forget_rate; + + pp->path_failures++; + + /* if we don't know the currently time, we don't know how long to + * delay the path, so there's no point in checking if we should + */ + + if (clock_gettime(CLOCK_MONOTONIC, &curr_time) != 0) + return 0; + /* when path failures has exceeded the san_path_err_threshold + * place the path in delayed state till san_path_err_recovery_time + * so that the cutomer can rectify the issue within this time. After + * the completion of san_path_err_recovery_time it should + * automatically reinstate the path + */ + if (pp->path_failures > pp->mpp->san_path_err_threshold) { + condlog(2, "%s : hit error threshold. Delaying path reinstatement", pp->dev); + pp->dis_reinstate_time = curr_time.tv_sec; + pp->disable_reinstate = 1; + + return 1; + } else { + return 0; + } + +reinstate_path: + pp->path_failures = 0; + pp->disable_reinstate = 0; + pp->san_path_err_forget_rate = 0; + return 0; +} + /* * Returns '1' if the path has been checked, '-1' if it was blacklisted * and '0' otherwise @@ -1980,6 +2058,12 @@ check_path (struct vectors * vecs, struct path * pp, int ticks) if (!pp->mpp) return 0; + if ((newstate == PATH_UP || newstate == PATH_GHOST) && + check_path_reinstate_state(pp)) { + pp->state = PATH_DELAYED; + return 1; + } + if (pp->io_err_disable_reinstate && hit_io_err_recheck_time(pp)) { pp->state = PATH_SHAKY; /* -- 2.19.2 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel