On Thu, Dec 17, 2020 at 12:00:15PM +0100, mwilck@xxxxxxxx wrote: > From: Martin Wilck <mwilck@xxxxxxxx> > > We've recently observed various cases of incompletely processed uevents > during initrd processing. Typically, this would leave a dm device in > the state it had after the initial "add" uevent, which is basically unusable, > because udevd had been killed by systemd before processing the subsequent > "change" event. After switching root, the coldplug event would re-read > the db file, which would be in unusable state, and would not do anything. > In such cases, a RELOAD action with force_udev_reload=1 is in order to > make udev re-process the device completely (DM_UDEV_PRIMARY_SOURCE_FLAG=1 and > DM_SUBSYSTEM_UDEV_FLAG0=0). > > The previous commits > > 2b25a9e libmultipath: select_action(): force udev reload for uninitialized maps > cb10d38 multipathd: uev_trigger(): handle incomplete ADD events > > addressed the same issue, but incompletely. They would miss cases where the > map was configured correctly but none of the RELOAD criteria were met. > This patch partially reverts 2b25a9e by converting select_reload_action() into > a trivial helper. Instead, we now check for incompletely initialized udev now > before checking any of the other reload criteria. I'll review this patch tomorrow, but are you able to reproduce this? I've seen something similar, except in the case I saw, multipathd took too long during the initial configuration, and the systemd shut things down for the switch-root before multipath could finish creating the devices. I was thinking of trying to solve cases like this by forcing some ordering on multipathd stopping in the initramfs, with something like Before=initrd-cleanup.service Conflicts=initrd-cleanup.service for the multipathd.service file for the initramfs. The goal is to make sure that things don't get shutdown until multipathd has stopped. This would keep multipath from creating devices when udev isn't around to deal with them. Did you try something like this? -Ben > Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> > --- > libmultipath/configure.c | 45 ++++++++++++++++++++++++++-------------- > 1 file changed, 29 insertions(+), 16 deletions(-) > > diff --git a/libmultipath/configure.c b/libmultipath/configure.c > index 3dbc1f1..d64fe88 100644 > --- a/libmultipath/configure.c > +++ b/libmultipath/configure.c > @@ -695,12 +695,11 @@ sysfs_set_max_sectors_kb(struct multipath *mpp, int is_reload) > return err; > } > > -static void > -select_reload_action(struct multipath *mpp, const struct multipath *cmpp, > - const char *reason) > +static bool is_udev_ready(struct multipath *cmpp) > { > struct udev_device *mpp_ud; > const char *env; > + bool rc; > > /* > * MPATH_DEVICE_READY != 1 can mean two things: > @@ -712,14 +711,20 @@ select_reload_action(struct multipath *mpp, const struct multipath *cmpp, > */ > > mpp_ud = get_udev_for_mpp(cmpp); > + if (!mpp_ud) > + return true; > env = udev_device_get_property_value(mpp_ud, "MPATH_DEVICE_READY"); > - if ((!env || strcmp(env, "1")) && count_active_paths(mpp) > 0) > - mpp->force_udev_reload = 1; > + rc = (env != NULL && !strcmp(env, "1")); > udev_device_unref(mpp_ud); > + condlog(4, "%s: %s: \"%s\" -> %d\n", __func__, cmpp->alias, env, rc); > + return rc; > +} > + > +static void > +select_reload_action(struct multipath *mpp, const char *reason) > +{ > mpp->action = ACT_RELOAD; > - condlog(3, "%s: set ACT_RELOAD (%s%s)", mpp->alias, > - mpp->force_udev_reload ? "forced, " : "", > - reason); > + condlog(3, "%s: set ACT_RELOAD (%s)", mpp->alias, reason); > } > > void select_action (struct multipath *mpp, const struct _vector *curmp, > @@ -788,10 +793,18 @@ void select_action (struct multipath *mpp, const struct _vector *curmp, > return; > } > > + if (!is_udev_ready(cmpp) && count_active_paths(mpp) > 0) { > + mpp->force_udev_reload = 1; > + mpp->action = ACT_RELOAD; > + condlog(3, "%s: set ACT_RELOAD (udev incomplete)", > + mpp->alias); > + return; > + } > + > if (mpp->no_path_retry != NO_PATH_RETRY_UNDEF && > !!strstr(mpp->features, "queue_if_no_path") != > !!strstr(cmpp->features, "queue_if_no_path")) { > - select_reload_action(mpp, cmpp, "no_path_retry change"); > + select_reload_action(mpp, "no_path_retry change"); > return; > } > if ((mpp->retain_hwhandler != RETAIN_HWHANDLER_ON || > @@ -799,7 +812,7 @@ void select_action (struct multipath *mpp, const struct _vector *curmp, > (strlen(cmpp->hwhandler) != strlen(mpp->hwhandler) || > strncmp(cmpp->hwhandler, mpp->hwhandler, > strlen(mpp->hwhandler)))) { > - select_reload_action(mpp, cmpp, "hwhandler change"); > + select_reload_action(mpp, "hwhandler change"); > return; > } > > @@ -807,7 +820,7 @@ void select_action (struct multipath *mpp, const struct _vector *curmp, > !!strstr(mpp->features, "retain_attached_hw_handler") != > !!strstr(cmpp->features, "retain_attached_hw_handler") && > get_linux_version_code() < KERNEL_VERSION(4, 3, 0)) { > - select_reload_action(mpp, cmpp, "retain_hwhandler change"); > + select_reload_action(mpp, "retain_hwhandler change"); > return; > } > > @@ -819,7 +832,7 @@ void select_action (struct multipath *mpp, const struct _vector *curmp, > remove_feature(&cmpp_feat, "queue_if_no_path"); > remove_feature(&cmpp_feat, "retain_attached_hw_handler"); > if (strncmp(mpp_feat, cmpp_feat, PARAMS_SIZE)) { > - select_reload_action(mpp, cmpp, "features change"); > + select_reload_action(mpp, "features change"); > FREE(cmpp_feat); > FREE(mpp_feat); > return; > @@ -830,19 +843,19 @@ void select_action (struct multipath *mpp, const struct _vector *curmp, > > if (!cmpp->selector || strncmp(cmpp->selector, mpp->selector, > strlen(mpp->selector))) { > - select_reload_action(mpp, cmpp, "selector change"); > + select_reload_action(mpp, "selector change"); > return; > } > if (cmpp->minio != mpp->minio) { > - select_reload_action(mpp, cmpp, "minio change"); > + select_reload_action(mpp, "minio change"); > return; > } > if (!cmpp->pg || VECTOR_SIZE(cmpp->pg) != VECTOR_SIZE(mpp->pg)) { > - select_reload_action(mpp, cmpp, "path group number change"); > + select_reload_action(mpp, "path group number change"); > return; > } > if (pgcmp(mpp, cmpp)) { > - select_reload_action(mpp, cmpp, "path group topology change"); > + select_reload_action(mpp, "path group topology change"); > return; > } > if (cmpp->nextpg != mpp->bestpg) { > -- > 2.29.0 -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel