There are cases where the wwid of a path changes due to LUN remapping without triggering uevent for the changed path. Multipathd has no method for trying to catch these cases, and corruption has resulted because of it. In order to have a better chance at catching these cases, multipath now has a recheck_wwid_time option, which can either be set to "off" or a number of seconds. If a path is failed for equal to or greater than the configured number of seconds, multipathd will recheck its wwid before restoring it, when the path checker sees that it has come back up. If multipathd notices that a path's wwid has changed it will remove and re-add the path, just like the existing wwid checking code for change events does. In cases where the no uevent occurs, both the udev database entry and sysfs will have the old wwid, so the only way to get a current wwid is to ask the device directly. Currently multipath only has code to directly get the wwid for scsi devices, so this option only effects scsi devices. Also, since it's possible the the udev wwid won't match the wwid from get_vpd_sgio(), if multipathd doesn't initially see the two values matching for a device, it will disable this option for that device. If recheck_wwid_time is not turned off, multipathd will also automatically recheck the wwid whenever an existing path gets a add event, or is manually re-added with cli_add_path(). Co-developed-by: Chongyun Wu <wucy11@xxxxxxxxxxxxxxx> Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> --- libmultipath/config.c | 1 + libmultipath/config.h | 1 + libmultipath/configure.c | 4 +- libmultipath/configure.h | 2 + libmultipath/defaults.h | 1 + libmultipath/dict.c | 36 ++++++++++++ libmultipath/libmultipath.version | 6 ++ libmultipath/structs.h | 10 ++++ multipath/multipath.conf.5 | 18 ++++++ multipathd/cli_handlers.c | 9 +++ multipathd/main.c | 92 +++++++++++++++++++++++++++++++ multipathd/main.h | 2 + 12 files changed, 180 insertions(+), 2 deletions(-) diff --git a/libmultipath/config.c b/libmultipath/config.c index be310159..c79ebbeb 100644 --- a/libmultipath/config.c +++ b/libmultipath/config.c @@ -867,6 +867,7 @@ int _init_config (const char *file, struct config *conf) conf->remove_retries = 0; conf->ghost_delay = DEFAULT_GHOST_DELAY; conf->all_tg_pt = DEFAULT_ALL_TG_PT; + conf->recheck_wwid_time = DEFAULT_RECHECK_WWID; /* * preload default hwtable */ diff --git a/libmultipath/config.h b/libmultipath/config.h index 9ce37f16..02ae2407 100644 --- a/libmultipath/config.h +++ b/libmultipath/config.h @@ -187,6 +187,7 @@ struct config { int marginal_pathgroups; int skip_delegate; unsigned int sequence_nr; + int recheck_wwid_time; char * multipath_dir; char * selector; diff --git a/libmultipath/configure.c b/libmultipath/configure.c index 598efe05..6ca1f4bb 100644 --- a/libmultipath/configure.c +++ b/libmultipath/configure.c @@ -527,8 +527,8 @@ get_udev_for_mpp(const struct multipath *mpp) return udd; } -static void trigger_partitions_udev_change(struct udev_device *dev, - const char *action, int len) +void trigger_partitions_udev_change(struct udev_device *dev, + const char *action, int len) { struct udev_enumerate *part_enum; struct udev_list_entry *item; diff --git a/libmultipath/configure.h b/libmultipath/configure.h index 6b23ccbb..70cf77a3 100644 --- a/libmultipath/configure.h +++ b/libmultipath/configure.h @@ -58,3 +58,5 @@ int get_refwwid (enum mpath_cmds cmd, const char *dev, enum devtypes dev_type, vector pathvec, char **wwid); struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type); void trigger_paths_udev_change(struct multipath *mpp, bool is_mpath); +void trigger_partitions_udev_change(struct udev_device *dev, const char *action, + int len); diff --git a/libmultipath/defaults.h b/libmultipath/defaults.h index 947ba467..d24b69a0 100644 --- a/libmultipath/defaults.h +++ b/libmultipath/defaults.h @@ -55,6 +55,7 @@ #define DEFAULT_ALL_TG_PT ALL_TG_PT_OFF /* Enable no foreign libraries by default */ #define DEFAULT_ENABLE_FOREIGN "NONE" +#define DEFAULT_RECHECK_WWID RECHECK_WWID_OFF #define CHECKINT_UNDEF UINT_MAX #define DEFAULT_CHECKINT 5 diff --git a/libmultipath/dict.c b/libmultipath/dict.c index bab96146..85782ea3 100644 --- a/libmultipath/dict.c +++ b/libmultipath/dict.c @@ -1401,6 +1401,41 @@ declare_hw_snprint(all_tg_pt, print_yes_no_undef) declare_def_handler(marginal_pathgroups, set_yes_no) declare_def_snprint(marginal_pathgroups, print_yes_no) +static int +set_recheck_wwid_time(vector strvec, void *ptr) +{ + int *int_ptr = (int *)ptr; + char * buff; + + buff = set_value(strvec); + if (!buff) + return 1; + + if (!strcmp(buff, "no") || !strcmp(buff, "off")) + *int_ptr = RECHECK_WWID_OFF; + else if (sscanf(buff, "%d", int_ptr) != 1 || *int_ptr < 0) + *int_ptr = DEFAULT_RECHECK_WWID; + + FREE(buff); + return 0; +} + +int +print_recheck_wwid_time(char * buff, int len, long v) +{ + switch(v) { + case RECHECK_WWID_OFF: + return snprintf(buff, len, "\"off\""); + default: + return snprintf(buff, len, "%li", v); + } +} + + + +declare_def_handler(recheck_wwid_time, set_recheck_wwid_time) +declare_def_snprint(recheck_wwid_time, print_recheck_wwid_time) + static int def_uxsock_timeout_handler(struct config *conf, vector strvec) { @@ -1819,6 +1854,7 @@ init_keywords(vector keywords) install_keyword("enable_foreign", &def_enable_foreign_handler, &snprint_def_enable_foreign); install_keyword("marginal_pathgroups", &def_marginal_pathgroups_handler, &snprint_def_marginal_pathgroups); + install_keyword("recheck_wwid_time", &def_recheck_wwid_time_handler, &snprint_def_recheck_wwid_time); __deprecated install_keyword("default_selector", &def_selector_handler, NULL); __deprecated install_keyword("default_path_grouping_policy", &def_pgpolicy_handler, NULL); __deprecated install_keyword("default_uid_attribute", &def_uid_attribute_handler, NULL); diff --git a/libmultipath/libmultipath.version b/libmultipath/libmultipath.version index 2228f4ec..e9b4608f 100644 --- a/libmultipath/libmultipath.version +++ b/libmultipath/libmultipath.version @@ -280,3 +280,9 @@ LIBMULTIPATH_4.4.0 { global: get_next_string; } LIBMULTIPATH_4.3.0; + +LIBMULITIPATH_4.5.0 { +global: + get_vpd_sgio; + trigger_partitions_udev_change; +} LIBMULTIPATH_4.4.0; diff --git a/libmultipath/structs.h b/libmultipath/structs.h index d6ff6762..357fdc24 100644 --- a/libmultipath/structs.h +++ b/libmultipath/structs.h @@ -38,6 +38,7 @@ #define NO_PATH_RETRY_FAIL -1 #define NO_PATH_RETRY_QUEUE -2 +#define RECHECK_WWID_OFF -1 enum free_path_mode { KEEP_PATHS, @@ -242,6 +243,13 @@ enum eh_deadline_states { EH_DEADLINE_ZERO = UOZ_ZERO, }; + +enum allow_wwid_recheck_states { + ALLOW_WWID_RECHECK_UNSET, + ALLOW_WWID_RECHECK_OFF, + ALLOW_WWID_RECHECK_ON, +}; + struct vpd_vendor_page { int pg; const char *name; @@ -316,6 +324,8 @@ struct path { int find_multipaths_timeout; int marginal; int vpd_vendor_id; + int failed_time; + int allow_wwid_recheck; /* configlet pointers */ vector hwe; struct gen_path generic_path; diff --git a/multipath/multipath.conf.5 b/multipath/multipath.conf.5 index 8ef3a747..50409d57 100644 --- a/multipath/multipath.conf.5 +++ b/multipath/multipath.conf.5 @@ -1273,6 +1273,24 @@ The default is: \fB\(dqNONE\(dq\fR .RE . . +.TP +.B recheck_wwid_time +Sets the number of seconds that a path device can be failed for, before +multipathd rechecks its wwid when it is restored. If the wwid has changed, +the path is removed from the current multipath device, and re-added as a new +path. If set to \fIoff\fR these rechecks are disabled. If set to \fI0\fR, +multipath will always recheck the path's wwid whenever it is restored. If +rechecks are enabled, they will also happen if an \fIadd\fR uevent occurs +for an existing path, or an existing path is manually re-added. These rechecks +will only happen for scsi devices. \fBNote:\fR When the path is originally +added, if the path's configured wwid doesn't match the wwid retrieved directly +from the scsi device, rechecks will be disabled for the device. +.RS +.TP +The default is \fBoff\fR +.RE +. +. . .\" ---------------------------------------------------------------------------- diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c index 54635738..970d5e21 100644 --- a/multipathd/cli_handlers.c +++ b/multipathd/cli_handlers.c @@ -715,6 +715,15 @@ cli_add_path (void * v, char ** reply, int * len, void * data) pp = find_path_by_dev(vecs->pathvec, param); if (pp && pp->initialized != INIT_REMOVED) { condlog(2, "%s: path already in pathvec", param); + + if (pp->allow_wwid_recheck == ALLOW_WWID_RECHECK_ON && + check_path_wwid_change(pp)) { + condlog(0, "%s: wwid changed. Removing device", + pp->dev); + handle_path_wwid_change(pp, vecs); + return 1; + } + if (pp->mpp) return 0; } else if (pp) { diff --git a/multipathd/main.c b/multipathd/main.c index 19679848..17eef3a4 100644 --- a/multipathd/main.c +++ b/multipathd/main.c @@ -823,6 +823,59 @@ ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs) return flush_map(mpp, vecs, 0); } +void +handle_path_wwid_change(struct path *pp, struct vectors *vecs) +{ + const char *action = "add"; + if (!pp || !pp->udev) + return; + + sysfs_attr_set_value(pp->udev, "uevent", action, strlen(action)); + trigger_partitions_udev_change(pp->udev, action, strlen(action)); + if (ev_remove_path(pp, vecs, 1) != 0 && pp->mpp) { + pp->dmstate = PSTATE_FAILED; + dm_fail_path(pp->mpp->alias, pp->dev_t); + } +} + +bool +check_path_wwid_change(struct path *pp) +{ + char wwid[WWID_SIZE]; + int len = 0; + char *c; + + if (!strlen(pp->wwid) || pp->bus != SYSFS_BUS_SCSI) + return false; + + /* Get the real fresh device wwid by sgio. sysfs still has old + * data, so only get_vpd_sgio will work to get the new wwid */ + len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE); + + if (len <= 0) { + condlog(2, "%s: failed to check wwid by sgio: len = %d", + pp->dev, len); + return false; + } + + /*Strip any trailing blanks */ + c = strchr(wwid, '\0'); + c--; + while (c && c >= wwid && *c == ' ') { + *c = '\0'; + c--; + } + condlog(4, "%s: Got wwid %s by sgio", pp->dev, wwid); + + if (strncmp(wwid, pp->wwid, WWID_SIZE)) { + condlog(0, "%s: wwid '%s' doesn't match wwid '%s' from device", + pp->dev, pp->wwid, wwid); + return true; + } + + return false; +} + static int uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map) { @@ -919,6 +972,21 @@ uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map) uev->kernel); ret = 1; } + } else if (pp->allow_wwid_recheck == ALLOW_WWID_RECHECK_ON && + check_path_wwid_change(pp)) { + condlog(2, "%s wwid change detected when processing add uevent. removing path", pp->dev); + /* + * don't use handle_path_wwid_change here, + * since that would just trigger another add + * uevent + */ + ret = ev_remove_path(pp, vecs, true); + if (ret == 0) + pp = NULL; + else if (pp->mpp) { + pp->dmstate = PSTATE_FAILED; + dm_fail_path(pp->mpp->alias, pp->dev_t); + } } } if (pp) @@ -2049,6 +2117,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) unsigned int checkint, max_checkint; struct config *conf; int marginal_pathgroups, marginal_changed = 0; + int recheck_wwid_time; int ret; if (((pp->initialized == INIT_OK || @@ -2066,6 +2135,7 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) checkint = conf->checkint; max_checkint = conf->max_checkint; marginal_pathgroups = conf->marginal_pathgroups; + recheck_wwid_time = conf->recheck_wwid_time; put_multipath_config(conf); if (pp->checkint == CHECKINT_UNDEF) { @@ -2197,6 +2267,26 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) return 0; set_no_path_retry(pp->mpp); + if (recheck_wwid_time != RECHECK_WWID_OFF && + (newstate == PATH_UP || newstate == PATH_GHOST)) { + if (pp->allow_wwid_recheck == ALLOW_WWID_RECHECK_UNSET) { + if (check_path_wwid_change(pp)) + pp->allow_wwid_recheck = ALLOW_WWID_RECHECK_OFF; + else + pp->allow_wwid_recheck = ALLOW_WWID_RECHECK_ON; + } else if (((pp->state != PATH_UP && pp->state != PATH_GHOST) || + pp->dmstate == PSTATE_FAILED) && + pp->failed_time >= recheck_wwid_time && + pp->allow_wwid_recheck == ALLOW_WWID_RECHECK_ON && + check_path_wwid_change(pp)) { + condlog(0, "%s: path wwid change detected. Removing", + pp->dev); + handle_path_wwid_change(pp, vecs); + return 0; + } + pp->failed_time = 0; + } + if ((newstate == PATH_UP || newstate == PATH_GHOST) && (san_path_check_enabled(pp->mpp) || marginal_path_check_enabled(pp->mpp))) { @@ -2330,6 +2420,8 @@ check_path (struct vectors * vecs, struct path * pp, unsigned int ticks) if (newstate == PATH_DOWN) { int log_checker_err; + if (recheck_wwid_time != RECHECK_WWID_OFF) + pp->failed_time += pp->checkint; conf = get_multipath_config(); log_checker_err = conf->log_checker_err; put_multipath_config(conf); diff --git a/multipathd/main.h b/multipathd/main.h index 5abbe97b..ddd953f9 100644 --- a/multipathd/main.h +++ b/multipathd/main.h @@ -50,4 +50,6 @@ int update_multipath (struct vectors *vecs, char *mapname, int reset); int reload_and_sync_map(struct multipath *mpp, struct vectors *vecs, int refresh); +void handle_path_wwid_change(struct path *pp, struct vectors *vecs); +bool check_path_wwid_change(struct path *pp); #endif /* MAIN_H */ -- 2.17.2 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel