[PATCH 1/7] libmultipath: Add max_retries config option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This option lets multipath set a scsi disk's max_retries sysfs value.
Setting this can be helpful for cases where the path checker succeeds,
but IO commands hang and timeout. By default, the SCSI layer will retry
IOs 5 times. Reducing this value will allow multipath to retry the IO
down another path sooner.

Signed-off-by: Benjamin Marzinski <bmarzins@xxxxxxxxxx>
---
 libmultipath/config.c         |  3 +++
 libmultipath/config.h         |  3 +++
 libmultipath/dict.c           | 34 ++++++++++++++++++++++++++++
 libmultipath/discovery.c      | 42 ++++++++++++++++++++++++++++++++++-
 libmultipath/propsel.c        | 18 +++++++++++++++
 libmultipath/propsel.h        |  1 +
 libmultipath/structs.h        |  7 ++++++
 multipath/multipath.conf.5.in | 22 ++++++++++++++++++
 8 files changed, 129 insertions(+), 1 deletion(-)

diff --git a/libmultipath/config.c b/libmultipath/config.c
index b7dbc6f5..9d90f512 100644
--- a/libmultipath/config.c
+++ b/libmultipath/config.c
@@ -420,6 +420,7 @@ merge_pce(struct pcentry *dst, struct pcentry *src)
 	merge_num(fast_io_fail);
 	merge_num(dev_loss);
 	merge_num(eh_deadline);
+	merge_num(max_retries);
 }
 
 static void
@@ -448,6 +449,7 @@ merge_hwe (struct hwentry * dst, struct hwentry * src)
 	merge_num(fast_io_fail);
 	merge_num(dev_loss);
 	merge_num(eh_deadline);
+	merge_num(max_retries);
 	merge_num(user_friendly_names);
 	merge_num(retain_hwhandler);
 	merge_num(detect_prio);
@@ -615,6 +617,7 @@ store_hwe (vector hwtable, struct hwentry * dhwe)
 	hwe->fast_io_fail = dhwe->fast_io_fail;
 	hwe->dev_loss = dhwe->dev_loss;
 	hwe->eh_deadline = dhwe->eh_deadline;
+	hwe->max_retries = dhwe->max_retries;
 	hwe->user_friendly_names = dhwe->user_friendly_names;
 	hwe->retain_hwhandler = dhwe->retain_hwhandler;
 	hwe->detect_prio = dhwe->detect_prio;
diff --git a/libmultipath/config.h b/libmultipath/config.h
index 8c22ce75..197a567f 100644
--- a/libmultipath/config.h
+++ b/libmultipath/config.h
@@ -47,6 +47,7 @@ struct pcentry {
 	int fast_io_fail;
 	unsigned int dev_loss;
 	int eh_deadline;
+	int max_retries;
 };
 
 struct hwentry {
@@ -72,6 +73,7 @@ struct hwentry {
 	int fast_io_fail;
 	unsigned int dev_loss;
 	int eh_deadline;
+	int max_retries;
 	int user_friendly_names;
 	int retain_hwhandler;
 	int detect_prio;
@@ -162,6 +164,7 @@ struct config {
 	int fast_io_fail;
 	unsigned int dev_loss;
 	int eh_deadline;
+	int max_retries;
 	int log_checker_err;
 	int allow_queueing;
 	int allow_usb_devices;
diff --git a/libmultipath/dict.c b/libmultipath/dict.c
index 044067af..fc438947 100644
--- a/libmultipath/dict.c
+++ b/libmultipath/dict.c
@@ -1152,6 +1152,36 @@ declare_hw_snprint(eh_deadline, print_undef_off_zero)
 declare_pc_handler(eh_deadline, set_undef_off_zero)
 declare_pc_snprint(eh_deadline, print_undef_off_zero)
 
+static int
+set_max_retries(vector strvec, void *ptr, const char *file, int line_nr)
+{
+	char * buff;
+	int *int_ptr = (int *)ptr;
+
+	buff = set_value(strvec);
+	if (!buff)
+		return 1;
+
+	if (strcmp(buff, "off") == 0)
+		*int_ptr = UOZ_OFF;
+	else if (strcmp(buff, "0") == 0)
+		*int_ptr = UOZ_ZERO;
+	else
+		do_set_int(strvec, int_ptr, 1, 5, file, line_nr, buff);
+
+	free(buff);
+	return 0;
+}
+
+declare_def_handler(max_retries, set_max_retries)
+declare_def_snprint(max_retries, print_undef_off_zero)
+declare_ovr_handler(max_retries, set_max_retries)
+declare_ovr_snprint(max_retries, print_undef_off_zero)
+declare_hw_handler(max_retries, set_max_retries)
+declare_hw_snprint(max_retries, print_undef_off_zero)
+declare_pc_handler(max_retries, set_max_retries)
+declare_pc_snprint(max_retries, print_undef_off_zero)
+
 static int
 set_pgpolicy(vector strvec, void *ptr, const char *file, int line_nr)
 {
@@ -2079,6 +2109,7 @@ init_keywords(vector keywords)
 	install_keyword("fast_io_fail_tmo", &def_fast_io_fail_handler, &snprint_def_fast_io_fail);
 	install_keyword("dev_loss_tmo", &def_dev_loss_handler, &snprint_def_dev_loss);
 	install_keyword("eh_deadline", &def_eh_deadline_handler, &snprint_def_eh_deadline);
+	install_keyword("max_retries", &def_max_retries_handler, &snprint_def_max_retries);
 	install_keyword("bindings_file", &deprecated_bindings_file_handler, &snprint_deprecated);
 	install_keyword("wwids_file", &deprecated_wwids_file_handler, &snprint_deprecated);
 	install_keyword("prkeys_file", &deprecated_prkeys_file_handler, &snprint_deprecated);
@@ -2176,6 +2207,7 @@ init_keywords(vector keywords)
 	install_keyword("fast_io_fail_tmo", &hw_fast_io_fail_handler, &snprint_hw_fast_io_fail);
 	install_keyword("dev_loss_tmo", &hw_dev_loss_handler, &snprint_hw_dev_loss);
 	install_keyword("eh_deadline", &hw_eh_deadline_handler, &snprint_hw_eh_deadline);
+	install_keyword("max_retries", &hw_max_retries_handler, &snprint_hw_max_retries);
 	install_keyword("user_friendly_names", &hw_user_friendly_names_handler, &snprint_hw_user_friendly_names);
 	install_keyword("retain_attached_hw_handler", &hw_retain_hwhandler_handler, &snprint_hw_retain_hwhandler);
 	install_keyword("detect_prio", &hw_detect_prio_handler, &snprint_hw_detect_prio);
@@ -2220,6 +2252,7 @@ init_keywords(vector keywords)
 	install_keyword("fast_io_fail_tmo", &ovr_fast_io_fail_handler, &snprint_ovr_fast_io_fail);
 	install_keyword("dev_loss_tmo", &ovr_dev_loss_handler, &snprint_ovr_dev_loss);
 	install_keyword("eh_deadline", &ovr_eh_deadline_handler, &snprint_ovr_eh_deadline);
+	install_keyword("max_retries", &ovr_max_retries_handler, &snprint_ovr_max_retries);
 	install_keyword("user_friendly_names", &ovr_user_friendly_names_handler, &snprint_ovr_user_friendly_names);
 	install_keyword("retain_attached_hw_handler", &ovr_retain_hwhandler_handler, &snprint_ovr_retain_hwhandler);
 	install_keyword("detect_prio", &ovr_detect_prio_handler, &snprint_ovr_detect_prio);
@@ -2248,6 +2281,7 @@ init_keywords(vector keywords)
 	install_keyword("fast_io_fail_tmo", &pc_fast_io_fail_handler, &snprint_pc_fast_io_fail);
 	install_keyword("dev_loss_tmo", &pc_dev_loss_handler, &snprint_pc_dev_loss);
 	install_keyword("eh_deadline", &pc_eh_deadline_handler, &snprint_pc_eh_deadline);
+	install_keyword("max_retries", &pc_max_retries_handler, &snprint_pc_max_retries);
 	install_sublevel_end();
 
 	install_keyword_root("multipaths", &multipaths_handler);
diff --git a/libmultipath/discovery.c b/libmultipath/discovery.c
index 84ce5fe7..ee261d90 100644
--- a/libmultipath/discovery.c
+++ b/libmultipath/discovery.c
@@ -614,6 +614,43 @@ sysfs_set_eh_deadline(struct path *pp)
 	return (ret <= 0);
 }
 
+static int
+sysfs_set_max_retries(struct path *pp)
+{
+	struct udev_device *parent;
+	char value[16];
+	STRBUF_ON_STACK(buf);
+	int ret, len;
+
+	if (pp->max_retries == MAX_RETRIES_UNSET)
+		return 0;
+
+	if (!pp->udev || pp->sg_id.host_no < 0)
+		return 1;
+
+	len = sprintf(value, "%d", (pp->max_retries == MAX_RETRIES_OFF)? -1 :
+				   (pp->max_retries == MAX_RETRIES_ZERO)? 0 :
+				   pp->max_retries);
+
+	parent = udev_device_get_parent_with_subsystem_devtype(pp->udev,
+			"scsi", "scsi_device");
+	if (!parent)
+		return 1;
+
+	if (print_strbuf(&buf, "scsi_disk/%i:%i:%i:%" PRIu64 "/max_retries",
+			 pp->sg_id.host_no, pp->sg_id.channel,
+			 pp->sg_id.scsi_id, pp->sg_id.lun) < 0)
+		return 1;
+
+	ret = sysfs_attr_set_value(parent, get_strbuf_str(&buf), value, len);
+	if (len != ret)
+		log_sysfs_attr_set_value(3, ret,
+					 "%s/%s: failed to set value to %s",
+					 udev_device_get_sysname(parent),
+					 get_strbuf_str(&buf), value);
+	return (len != ret);
+}
+
 static void
 sysfs_set_rport_tmo(struct multipath *mpp, struct path *pp)
 {
@@ -875,10 +912,12 @@ sysfs_set_scsi_tmo (struct config *conf, struct multipath *mpp)
 		select_fast_io_fail(conf, pp);
 		select_dev_loss(conf, pp);
 		select_eh_deadline(conf, pp);
+		select_max_retries(conf, pp);
 
 		if (pp->dev_loss == DEV_LOSS_TMO_UNSET &&
 		    pp->fast_io_fail == MP_FAST_IO_FAIL_UNSET &&
-		    pp->eh_deadline == EH_DEADLINE_UNSET)
+		    pp->eh_deadline == EH_DEADLINE_UNSET &&
+		    pp->max_retries == MAX_RETRIES_UNSET)
 			continue;
 
 		if (pp->bus != SYSFS_BUS_SCSI) {
@@ -886,6 +925,7 @@ sysfs_set_scsi_tmo (struct config *conf, struct multipath *mpp)
 			continue;
 		}
 		sysfs_set_eh_deadline(pp);
+		sysfs_set_max_retries(pp);
 
 		if (pp->dev_loss == DEV_LOSS_TMO_UNSET &&
 		    pp->fast_io_fail == MP_FAST_IO_FAIL_UNSET)
diff --git a/libmultipath/propsel.c b/libmultipath/propsel.c
index 44241e2a..15abb9e5 100644
--- a/libmultipath/propsel.c
+++ b/libmultipath/propsel.c
@@ -960,6 +960,24 @@ out:
 	return 0;
 }
 
+int select_max_retries(struct config *conf, struct path *pp)
+{
+	const char *origin;
+	STRBUF_ON_STACK(buff);
+
+	pp_set_ovr_pce(max_retries);
+	pp_set_hwe(max_retries);
+	pp_set_conf(max_retries);
+	pp->max_retries = MAX_RETRIES_UNSET;
+	/* not changing sysfs in default cause, so don't print anything */
+	return 0;
+out:
+	print_undef_off_zero(&buff, pp->max_retries);
+	condlog(3, "%s: max_retries = %s %s", pp->dev,
+		get_strbuf_str(&buff), origin);
+	return 0;
+}
+
 int select_flush_on_last_del(struct config *conf, struct multipath *mp)
 {
 	const char *origin;
diff --git a/libmultipath/propsel.h b/libmultipath/propsel.h
index 73615c2f..7203509e 100644
--- a/libmultipath/propsel.h
+++ b/libmultipath/propsel.h
@@ -21,6 +21,7 @@ int select_gid(struct config *conf, struct multipath *mp);
 int select_fast_io_fail(struct config *conf, struct path *pp);
 int select_dev_loss(struct config *conf, struct path *pp);
 int select_eh_deadline(struct config *conf, struct path *pp);
+int select_max_retries(struct config *conf, struct path *pp);
 int select_reservation_key(struct config *conf, struct multipath *mp);
 int select_retain_hwhandler (struct config *conf, struct multipath * mp);
 int select_detect_prio(struct config *conf, struct path * pp);
diff --git a/libmultipath/structs.h b/libmultipath/structs.h
index 17e13ee7..c20e99ce 100644
--- a/libmultipath/structs.h
+++ b/libmultipath/structs.h
@@ -295,6 +295,12 @@ enum eh_deadline_states {
 	EH_DEADLINE_ZERO = UOZ_ZERO,
 };
 
+enum max_retries_states {
+	MAX_RETRIES_UNSET = UOZ_UNDEF,
+	MAX_RETRIES_OFF = UOZ_OFF,
+	MAX_RETRIES_ZERO = UOZ_ZERO,
+};
+
 enum recheck_wwid_states {
 	RECHECK_WWID_UNDEF = YNU_UNDEF,
 	RECHECK_WWID_OFF = YNU_NO,
@@ -381,6 +387,7 @@ struct path {
 	int fast_io_fail;
 	unsigned int dev_loss;
 	int eh_deadline;
+	int max_retries;
 	bool is_checked;
 	bool can_use_env_uid;
 	unsigned int checker_timeout;
diff --git a/multipath/multipath.conf.5.in b/multipath/multipath.conf.5.in
index 226d0019..41f3927e 100644
--- a/multipath/multipath.conf.5.in
+++ b/multipath/multipath.conf.5.in
@@ -793,6 +793,22 @@ The default is: \fB<unset>\fR
 .
 .
 .TP
+.B max_retries
+Specify the maximum number of times the SCSI layer will retry IO commands
+before returning failure. Setting this can be helpful for cases where the path
+checker succeeds, but IO commands hang and timeout. By default, the SCSI layer
+will retry IOs 5 times. Reducing this value will allow multipath to retry the IO
+down another path sooner. \fBNote:\fR If it is necessary to set this value, it
+is also recommended to set up shaky paths detection. See "Shaky paths detection"
+below. Valid values are
+\fB0\fR through \fB5\fR.
+.RS
+.TP
+The default is: \fB<unset>\fR
+.RE
+.
+.
+.TP
 .B bindings_file
 (Deprecated) This option is not supported any more, and will be ignored.
 .RS
@@ -1687,6 +1703,8 @@ section:
 .TP
 .B eh_deadline
 .TP
+.B max_retries
+.TP
 .B flush_on_last_del
 .TP
 .B user_friendly_names
@@ -1773,6 +1791,8 @@ the values are taken from the \fIdevices\fR or \fIdefaults\fR sections:
 .TP
 .B eh_deadline
 .TP
+.B max_retries
+.TP
 .B user_friendly_names
 .TP
 .B retain_attached_hw_handler
@@ -1844,6 +1864,8 @@ from the \fIoverrides\fR, \fIdevices\fR, or \fIdefaults\fR section:
 .B dev_loss_tmo
 .TP
 .B eh_deadline
+.TP
+.B max_retries
 .PD
 .
 .
-- 
2.41.0





[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux