Changing max_sectors_kb on a live map is dangerous. When I/O occurs while the max_sectors_kb value of a map is larger than that of any of its paths, I/O errors like this may result: blk_insert_cloned_request: over max size limit. (127 > 126) This situation must be strictly avoided. The kernel makes sure that the map's limits match those of the paths when the map is created. But setting max_sectors_kb on path devices before reloading is dangerous, even if we read the value from the map's max_sectors_kb beforehand. The reason for this is that the sysfs value max_sectors_kb is the kernel's max_sectors divided by 2, and user space has no way to figure out if the kernel-internal value is odd or even. Thus by writing back the value just read to max_sectors_kb, one might actually decrease the kernel value by one. The only safe way to set max_sectors_kb on a live map would be to suspend it, flushing all outstanding IO, then the path max_sectors_kb, reload, and resume. Since commit 8fd4868 ("libmultipath: don't set max_sectors_kb on reloads"), we don't set the configured max_sectors_kb any more. But as shown above, this is not safe. So really only set max_sectors_kb when creating a map. Users who have stacked block devices on top of multipath, as described in the commit message of 8fd4868 must make sure that the max_sectors_kb setting is correct when the multipath map is first created. Decreasing the map's max_sectors_kb value (without touching the paths) should actually be possible in this situation, because stacked block devices are usually bio-based, and bio-based IO (in contrast to request-based) can be split if the lower-level device has can't handle the size of the I/O. Signed-off-by: Martin Wilck <mwilck@xxxxxxxx> --- libmultipath/configure.c | 38 ++++---------------------------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/libmultipath/configure.c b/libmultipath/configure.c index 13602f3..8cbb2a8 100644 --- a/libmultipath/configure.c +++ b/libmultipath/configure.c @@ -566,46 +566,18 @@ trigger_paths_udev_change(struct multipath *mpp, bool is_mpath) mpp->needs_paths_uevent = 0; } -static int -sysfs_set_max_sectors_kb(struct multipath *mpp, int is_reload) +static int sysfs_set_max_sectors_kb(struct multipath *mpp) { struct pathgroup * pgp; struct path *pp; char buff[11]; ssize_t len; int i, j, ret, err = 0; - struct udev_device *udd; - int max_sectors_kb; if (mpp->max_sectors_kb == MAX_SECTORS_KB_UNDEF) return 0; - max_sectors_kb = mpp->max_sectors_kb; - if (is_reload) { - if (!has_dm_info(mpp) && - dm_get_info(mpp->alias, &mpp->dmi) != 0) { - condlog(1, "failed to get dm info for %s", mpp->alias); - return 1; - } - udd = get_udev_for_mpp(mpp); - if (!udd) { - condlog(1, "failed to get udev device to set max_sectors_kb for %s", mpp->alias); - return 1; - } - ret = sysfs_attr_get_value(udd, "queue/max_sectors_kb", buff, - sizeof(buff)); - udev_device_unref(udd); - if (!sysfs_attr_value_ok(ret, sizeof(buff))) { - condlog(1, "failed to get current max_sectors_kb from %s", mpp->alias); - return 1; - } - if (sscanf(buff, "%u\n", &max_sectors_kb) != 1) { - condlog(1, "can't parse current max_sectors_kb from %s", - mpp->alias); - return 1; - } - } - snprintf(buff, 11, "%d", max_sectors_kb); - len = strlen(buff); + + len = snprintf(buff, sizeof(buff), "%d", mpp->max_sectors_kb); vector_foreach_slot (mpp->pg, pgp, i) { vector_foreach_slot(pgp->paths, pp, j) { @@ -923,7 +895,7 @@ int domap(struct multipath *mpp, char *params, int is_daemon) return DOMAP_RETRY; } - sysfs_set_max_sectors_kb(mpp, 0); + sysfs_set_max_sectors_kb(mpp); if (is_daemon && mpp->ghost_delay > 0 && count_active_paths(mpp) && pathcount(mpp, PATH_UP) == 0) mpp->ghost_delay_tick = mpp->ghost_delay; @@ -934,7 +906,6 @@ int domap(struct multipath *mpp, char *params, int is_daemon) case ACT_RELOAD: case ACT_RELOAD_RENAME: - sysfs_set_max_sectors_kb(mpp, 1); if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP)) mpp->ghost_delay_tick = 0; r = dm_addmap_reload(mpp, params, 0); @@ -942,7 +913,6 @@ int domap(struct multipath *mpp, char *params, int is_daemon) case ACT_RESIZE: case ACT_RESIZE_RENAME: - sysfs_set_max_sectors_kb(mpp, 1); if (mpp->ghost_delay_tick > 0 && pathcount(mpp, PATH_UP)) mpp->ghost_delay_tick = 0; r = dm_addmap_reload(mpp, params, 1); -- 2.44.0