[PATCH] multipathd: LUN protection by checking path's wwid change status

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From 37c74873acfc1587e79a6504ca3d42b8fa00d49e Mon Sep 17 00:00:00 2001

From: Chongyun Wu <wucy11@xxxxxxxxxxxxxxx>
Date: Mon, 21 Dec 2020 09:51:20 +0800
Subject: [PATCH] multipathd: LUN data protection by checking path's wwid
 change status

Issue description:
A) Device sda and sdb correspend to LUN1 and LUN2 in storage backend and
the upper layer application uses those two devices.
B) Doing illegal operation: unmapping LUN1 and LUN2 in storage backend,
and export LUN2 and LUN1 to host with exchanged assignment relation
between sda and sdb.
C) The upper layer application run for a while and found that the data
in both LUN1 and LUN2 has been corrupted.

Protection method:
Before processing below process check the path wwid first, if wwid
changed just remove this path not to use it again:
1)process add path uevent;
2)process path reinstate;
3)process add path cli command;
Note: multipath already have chang uvevent wwid change check but
fellow above issue reproduce step, there is not change uevent but
have add path event and remove uevent(sometimes).

Test result:
After switching the assignment relation in storage backend, the related
device has been removed or keep failed state and LUN data not been
corrupted again.

Signed-off-by: Chongyun Wu <wucy11@xxxxxxxxxxxxxxx>
---
 libmultipath/configure.c  | 13 ++++++++
 libmultipath/configure.h  |  1 +
 multipathd/cli_handlers.c |  7 +++++
 multipathd/main.c         | 63 +++++++++++++++++++++++++++++++++++++++
 multipathd/main.h         |  2 +-
 5 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/libmultipath/configure.c b/libmultipath/configure.c
index 6fb477fc..6dc8245f 100644
--- a/libmultipath/configure.c
+++ b/libmultipath/configure.c
@@ -569,6 +569,19 @@ unref:
     udev_enumerate_unref(part_enum);
 }

+void
+trigger_path_udev_remove(struct path *pp)
+{
+    const char *action = "remove";
+
+    if (!pp || !pp->udev)
+        return;
+
+    condlog(0, "%s wwid changed, trigger %s uevent", pp->dev, action);
+    sysfs_attr_set_value(pp->udev, "uevent", action, strlen(action));
+    trigger_partitions_udev_change(pp->udev, action, strlen(action));
+}
+
 void
 trigger_paths_udev_change(struct multipath *mpp, bool is_mpath)
 {
diff --git a/libmultipath/configure.h b/libmultipath/configure.h
index 6b23ccbb..2c1697ef 100644
--- a/libmultipath/configure.h
+++ b/libmultipath/configure.h
@@ -58,3 +58,4 @@ int get_refwwid (enum mpath_cmds cmd, const char *dev, enum devtypes dev_type,
          vector pathvec, char **wwid);
 struct udev_device *get_udev_device(const char *dev, enum devtypes dev_type);
 void trigger_paths_udev_change(struct multipath *mpp, bool is_mpath);
+void trigger_path_udev_remove(struct path *pp);
diff --git a/multipathd/cli_handlers.c b/multipathd/cli_handlers.c
index 235e2a2e..836f5465 100644
--- a/multipathd/cli_handlers.c
+++ b/multipathd/cli_handlers.c
@@ -715,6 +715,13 @@ cli_add_path (void * v, char ** reply, int * len, void * data)
     pp = find_path_by_dev(vecs->pathvec, param);
     if (pp && pp->initialized != INIT_REMOVED) {
         condlog(2, "%s: path already in pathvec", param);
+
+        /* Check if WWID has changed to protect data from being wirtten to the wrong device */
+        if (check_wwid_change(pp, vecs)) {
+            condlog(0, "cli add path failed for %s wwid changed", pp->dev);
+            return 1;
+        }
+
         if (pp->mpp)
             return 0;
     } else if (pp) {
diff --git a/multipathd/main.c b/multipathd/main.c
index a4abbb27..adedaf7a 100644
--- a/multipathd/main.c
+++ b/multipathd/main.c
@@ -813,6 +813,43 @@ ev_remove_map (char * devname, char * alias, int minor, struct vectors * vecs)
     return flush_map(mpp, vecs, 0);
 }

+bool check_wwid_change(struct path *pp, struct vectors *vecs)
+{
+    char wwid[WWID_SIZE];
+    int len =0;
+    char *c;
+
+    if (!strlen(pp->wwid))
+        return false;
+
+    /* Get the real fresh device wwid by sgio */
+    len = get_vpd_sgio(pp->fd, 0x83, 0, wwid, WWID_SIZE);
+    condlog(3, "Get wwid(%s) of dev(%s) by sgio", wwid, pp->dev);
+
+    if (len <= 0) {
+        condlog(3, "Failed to get wwid for %s by sgio: len = %d", pp->dev, len);
+        return false;
+    } else {
+        /*Strip any trailing blanks */
+        c = strchr(wwid, '\0');
+        c--;
+        while (c && c >= wwid && *c == ' ') {
+            *c = '\0';
+            c--;
+        }
+    }
+
+    if (strncmp(wwid, pp->wwid, WWID_SIZE)) {
+        condlog(0, "path %s wwid changed from '%s' to '%s', try to remove it",
+                pp->dev, pp->wwid, wwid);
+        trigger_path_udev_remove(pp);
+        ev_remove_path(pp, vecs, 1);
+        return true;
+    }
+
+    return false;
+}
+
 static int
 uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
 {
@@ -910,6 +947,19 @@ uev_add_path (struct uevent *uev, struct vectors * vecs, int need_do_map)
                     uev->kernel);
                 ret = 1;
             }
+        } else if (pp->mpp && strlen(pp->wwid)) {
+            /*
+             * Data protection: check path wwid and remove this path when wwid changed,
+             * cause of storage backend assignment might been changed
+             * by illegal operation make this path actually indicte to other LUN and kernel might +             * not adapt scsi layer assginment automatic, so multipathd need to check it and
+             * make some protection by removing this path.
+             */
+            if (check_wwid_change(pp, vecs)) {
+                condlog(2, "Detect dev %s wwid changed when processing add uvent",
+                       pp->dev);
+                ret = 1;
+            }
         }
     }
     if (pp)
@@ -1700,6 +1750,19 @@ reinstate_path (struct path * pp)
     if (!pp->mpp)
         return;

+    /*
+     * Data protection before reinstate path: check path wwid and remove this path +     * when wwid changed, cause of storage backend assignment might been changed +     * by illegal operation make this path actually indicte to other LUN and kernel might +     * not adapt scsi layer assginment automatic, so multipathd need to check it and +     * make some protection by removing this path or at lest not to use this path.
+     */
+    if (check_wwid_change(pp, gvecs)) {
+        condlog(0, "Device %s wwid changed not to reinstate this path and try to remove it",
+                pp->dev);
+        return;
+    }
+
     if (dm_reinstate_path(pp->mpp->alias, pp->dev_t))
         condlog(0, "%s: reinstate failed", pp->dev_t);
     else {
diff --git a/multipathd/main.h b/multipathd/main.h
index 5abbe97b..1b4328e0 100644
--- a/multipathd/main.h
+++ b/multipathd/main.h
@@ -49,5 +49,5 @@ int __setup_multipath (struct vectors * vecs, struct multipath * mpp,
 int update_multipath (struct vectors *vecs, char *mapname, int reset);
 int reload_and_sync_map(struct multipath *mpp, struct vectors *vecs,
             int refresh);
-
+bool check_wwid_change(struct path * pp, struct vectors * vecs);
 #endif /* MAIN_H */
--


--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux