[PATCH] libceph: Complete stuck requests to OSD with EIO

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Artur Molchanov <artur.molchanov@xxxxxxxxxx>

Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
If osd_request_timeout equals to 0 (default value) then do nothing with
hung requests (keep default behavior).

Create RBD map option osd_request_timeout to set timeout in seconds. Set
osd_request_timeout to 0 by default.

Signed-off-by: Artur Molchanov <artur.molchanov@xxxxxxxxxx>
---
 include/linux/ceph/libceph.h    |  8 +++--
 include/linux/ceph/osd_client.h |  1 +
 net/ceph/ceph_common.c          | 12 +++++++
 net/ceph/osd_client.c           | 72 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e..10d8acb 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
 	unsigned long mount_timeout;		/* jiffies */
 	unsigned long osd_idle_ttl;		/* jiffies */
 	unsigned long osd_keepalive_timeout;	/* jiffies */
+	unsigned long osd_request_timeout;	/* jiffies */

 	/*
 	 * any type that can't be simply compared or doesn't need need
@@ -65,9 +66,10 @@ struct ceph_options {
 /*
  * defaults
  */
-#define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000)
-#define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000)
-#define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000)
+#define CEPH_MOUNT_TIMEOUT_DEFAULT		msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_KEEPALIVE_DEFAULT		msecs_to_jiffies(5 * 1000)
+#define CEPH_OSD_IDLE_TTL_DEFAULT		msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT	msecs_to_jiffies(0 * 1000)

 #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000)
 #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03a6653..e45cba0 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -279,6 +279,7 @@ struct ceph_osd_client {
 	atomic_t               num_homeless;
 	struct delayed_work    timeout_work;
 	struct delayed_work    osds_timeout_work;
+	struct delayed_work    osd_request_timeout_work;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry 	       *debugfs_file;
 #endif
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e885..81876c8 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
 	Opt_osdkeepalivetimeout,
 	Opt_mount_timeout,
 	Opt_osd_idle_ttl,
+	Opt_osd_request_timeout,
 	Opt_last_int,
 	/* int args above */
 	Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
 	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
 	{Opt_mount_timeout, "mount_timeout=%d"},
 	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+	{Opt_osd_request_timeout, "osd_request_timeout=%d"},
 	/* int args above */
 	{Opt_fsid, "fsid=%s"},
 	{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;

 	/* get mon ip(s) */
 	/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
 			}
 			opt->mount_timeout = msecs_to_jiffies(intval * 1000);
 			break;
+		case Opt_osd_request_timeout:
+			/* 0 is "wait forever" (i.e. infinite timeout) */
+			if (intval < 0 || intval > INT_MAX / 1000) {
+				pr_err("osd_request_timeout out of range\n");
+				err = -EINVAL;
+				goto out;
+			}
+			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+			break;

 		case Opt_share:
 			opt->flags &= ~CEPH_OPT_NOSHARE;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 842f049..2f5a024 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2554,6 +2554,75 @@ static void handle_timeout(struct work_struct *work)
 			      osdc->client->options->osd_keepalive_timeout);
 }

+/*
+ * Complete request to OSD with error err if it started before cutoff.
+ */
+static void complete_osd_stuck_request(struct ceph_osd_request *req,
+				       unsigned long cutoff,
+				       int err)
+{
+	if (!time_before(req->r_stamp, cutoff))
+		return;
+
+	pr_warn_ratelimited("osd req on osd%d timeout expired\n",
+			    req->r_osd->o_osd);
+
+	complete_request(req, err);
+}
+
+/*
+ * Complete all requests to OSD that has been active for more than timeout.
+ */
+static void complete_osd_stuck_requests(struct ceph_osd *osd,
+					unsigned long timeout,
+					int err)
+{
+	struct ceph_osd_request *req, *n;
+	const unsigned long cutoff = jiffies - timeout;
+
+	rbtree_postorder_for_each_entry_safe(req, n, &osd->o_requests, r_node) {
+		complete_osd_stuck_request(req, cutoff, -EIO);
+	}
+}
+
+/*
+ * Timeout callback, called every N seconds. When 1 or more OSD
+ * requests that has been active for more than N seconds,
+ * we complete it with error EIO.
+ */
+static void handle_osd_request_timeout(struct work_struct *work)
+{
+	struct ceph_osd_client *osdc =
+		container_of(work, struct ceph_osd_client,
+			     osd_request_timeout_work.work);
+	struct ceph_osd *osd, *n;
+	struct ceph_options *opts = osdc->client->options;
+	const unsigned long timeout = opts->osd_request_timeout;
+
+	dout("%s osdc %p\n", __func__, osdc);
+
+	if (!timeout)
+		return;
+
+	down_write(&osdc->lock);
+
+	rbtree_postorder_for_each_entry_safe(osd, n, &osdc->osds, o_node) {
+		complete_osd_stuck_requests(osd, timeout, -EIO);
+	}
+
+	up_write(&osdc->lock);
+
+	if (!atomic_read(&osdc->num_homeless))
+		goto out;
+
+	down_write(&osdc->lock);
+	complete_osd_stuck_requests(&osdc->homeless_osd, timeout, -EIO);
+	up_write(&osdc->lock);
+
+out:
+	schedule_delayed_work(&osdc->osd_request_timeout_work, timeout);
+}
+
 static void handle_osds_timeout(struct work_struct *work)
 {
 	struct ceph_osd_client *osdc =
@@ -4091,6 +4160,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 	osdc->linger_map_checks = RB_ROOT;
 	INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
 	INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+	INIT_DELAYED_WORK(&osdc->osd_request_timeout_work, handle_osd_request_timeout);

 	err = -ENOMEM;
 	osdc->osdmap = ceph_osdmap_alloc();
@@ -4120,6 +4190,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 			      osdc->client->options->osd_keepalive_timeout);
 	schedule_delayed_work(&osdc->osds_timeout_work,
 	    round_jiffies_relative(osdc->client->options->osd_idle_ttl));
+	schedule_delayed_work(&osdc->osd_request_timeout_work,
+	    round_jiffies_relative(osdc->client->options->osd_request_timeout));

 	return 0;

--
2.7.4
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux