On Wed, 2021-02-24 at 00:33 -0600, Benjamin Marzinski wrote: > There are cases where the wwid of a path changes due to LUN remapping > without triggering uevent for the changed path. Multipathd has no > method > for trying to catch these cases, and corruption has resulted because > of > it. > > In order to have a better chance at catching these cases, multipath > now > has a recheck_wwid option. If this is set to "yes", when a failed > path > has become active again, multipathd will recheck its wwid. If > multipathd > notices that a path's wwid has changed, it will remove and re-add the > path, just like the existing wwid checking code for change events > does. > In cases where the no uevent occurs, both the udev database entry and > sysfs will have the old wwid, so the only way to get a current wwid > is > to ask the device directly. Currently multipath only has code to > directly get the wwid for scsi devices, so this option only effects > scsi > devices, and they must be configured to be able to use the > uid_fallback > methods. To make sure both the sysfs and udev database values are > updated, multipathd triggers a both a rescan of the device and a udev > add event. > > Co-developed-by: Chongyun Wu <wucy11@xxxxxxxxxxxxxxx> > Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx> Two minor remarks below, but: Reviewed-by: Martin Wilck <mwilck@xxxxxxxx> > > +static void > +rescan_path(struct udev_device *parent) > +{ > + while(parent) { > + const char *subsys = > udev_device_get_subsystem(parent); > + if (subsys && !strncmp(subsys, "scsi", 4)) > + break; > + parent = udev_device_get_parent(parent); > + } You could have used udev_device_get_parent_with_subsystem_devtype() here. > + if (parent) > + sysfs_attr_set_value(parent, "rescan", "1", > strlen("1")); > +} > + > +void > +handle_path_wwid_change(struct path *pp, struct vectors *vecs) > +{ > + struct udev_device *udd; > + > + if (!pp || !pp->udev) > + return; > + > + udd = udev_device_ref(pp->udev); > + if (ev_remove_path(pp, vecs, 1) != 0 && pp->mpp) { > + pp->dmstate = PSTATE_FAILED; > + dm_fail_path(pp->mpp->alias, pp->dev_t); > + } > + rescan_path(udd); > + sysfs_attr_set_value(udd, "uevent", "add", strlen("add")); > + trigger_partitions_udev_change(udd, "add", strlen("add")); Why do you need to do this for the partitions? > + udev_device_unref(udd); > +} > + > +bool > +check_path_wwid_change(struct path *pp) > +{ > + char wwid[WWID_SIZE]; > + int len = 0; > + size_t i; > + > + if (!strlen(pp->wwid)) > + return false; > + > + /* Get the real fresh device wwid by sgio. sysfs still has > old > + * data, so only get_vpd_sgio will work to get the new wwid > */ > + len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE); > + > + if (len <= 0) { > + condlog(2, "%s: failed to check wwid by sgio: len = > %d", > + pp->dev, len); > + return false; > + } > + > + /*Strip any trailing blanks */ > + for (i = strlen(pp->wwid); i > 0 && pp->wwid[i-1] == ' '; i-- > ); > + /* no-op */ > + pp->wwid[i] = '\0'; > + condlog(4, "%s: Got wwid %s by sgio", pp->dev, wwid); > + > + if (strncmp(wwid, pp->wwid, WWID_SIZE)) { > + condlog(0, "%s: wwid '%s' doesn't match wwid '%s' > from device", > + pp->dev, pp->wwid, wwid); > + return true; > + } > + > + return false; > +} > + > static int > uev_add_path (struct uevent *uev, struct vectors * vecs, int > need_do_map) > { > @@ -1296,6 +1363,7 @@ uev_update_path (struct uevent *uev, struct > vectors * vecs) > condlog(0, "%s: path wwid changed from '%s' > to '%s'", > uev->kernel, wwid, pp->wwid); > ev_remove_path(pp, vecs, 1); > + rescan_path(uev->udev); > needs_reinit = 1; > goto out; > } else { > @@ -2197,6 +2265,16 @@ check_path (struct vectors * vecs, struct path > * pp, unsigned int ticks) > return 0; > set_no_path_retry(pp->mpp); > > + if (pp->recheck_wwid == RECHECK_WWID_ON && > + (newstate == PATH_UP || newstate == PATH_GHOST) && > + ((pp->state != PATH_UP && pp->state != PATH_GHOST) || > + pp->dmstate == PSTATE_FAILED) && > + check_path_wwid_change(pp)) { > + condlog(0, "%s: path wwid change detected. Removing", > pp->dev); > + handle_path_wwid_change(pp, vecs); > + return 0; > + } > + > if ((newstate == PATH_UP || newstate == PATH_GHOST) && > (san_path_check_enabled(pp->mpp) || > marginal_path_check_enabled(pp->mpp))) { > diff --git a/multipathd/main.h b/multipathd/main.h > index 5abbe97b..ddd953f9 100644 > --- a/multipathd/main.h > +++ b/multipathd/main.h > @@ -50,4 +50,6 @@ int update_multipath (struct vectors *vecs, char > *mapname, int reset); > int reload_and_sync_map(struct multipath *mpp, struct vectors *vecs, > int refresh); > > +void handle_path_wwid_change(struct path *pp, struct vectors *vecs); > +bool check_path_wwid_change(struct path *pp); > #endif /* MAIN_H */ -- Dr. Martin Wilck <mwilck@xxxxxxxx>, Tel. +49 (0)911 74053 2107 SUSE Software Solutions Germany GmbH HRB 36809, AG Nürnberg GF: Felix Imendörffer -- dm-devel mailing list dm-devel@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/dm-devel