From: Artur Molchanov <artur.molchanov@xxxxxxxxxx>
Complete stuck requests to OSD with error EIO after osd_request_timeout expired.
If osd_request_timeout equals to 0 (default value) then do nothing with
hung requests (keep default behavior).
Create RBD map option osd_request_timeout to set timeout in seconds. Set
osd_request_timeout to 0 by default.
Signed-off-by: Artur Molchanov <artur.molchanov@xxxxxxxxxx>
---
include/linux/ceph/libceph.h | 8 +++--
include/linux/ceph/osd_client.h | 1 +
net/ceph/ceph_common.c | 12 +++++++
net/ceph/osd_client.c | 72 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 90 insertions(+), 3 deletions(-)
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 1816c5e..10d8acb 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -48,6 +48,7 @@ struct ceph_options {
unsigned long mount_timeout; /* jiffies */
unsigned long osd_idle_ttl; /* jiffies */
unsigned long osd_keepalive_timeout; /* jiffies */
+ unsigned long osd_request_timeout; /* jiffies */
/*
* any type that can't be simply compared or doesn't need need
@@ -65,9 +66,10 @@ struct ceph_options {
/*
* defaults
*/
-#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
-#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
-#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
+#define CEPH_MOUNT_TIMEOUT_DEFAULT msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_KEEPALIVE_DEFAULT msecs_to_jiffies(5 * 1000)
+#define CEPH_OSD_IDLE_TTL_DEFAULT msecs_to_jiffies(60 * 1000)
+#define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT msecs_to_jiffies(0 * 1000)
#define CEPH_MONC_HUNT_INTERVAL msecs_to_jiffies(3 * 1000)
#define CEPH_MONC_PING_INTERVAL msecs_to_jiffies(10 * 1000)
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03a6653..e45cba0 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -279,6 +279,7 @@ struct ceph_osd_client {
atomic_t num_homeless;
struct delayed_work timeout_work;
struct delayed_work osds_timeout_work;
+ struct delayed_work osd_request_timeout_work;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_file;
#endif
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 464e885..81876c8 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -230,6 +230,7 @@ enum {
Opt_osdkeepalivetimeout,
Opt_mount_timeout,
Opt_osd_idle_ttl,
+ Opt_osd_request_timeout,
Opt_last_int,
/* int args above */
Opt_fsid,
@@ -256,6 +257,7 @@ static match_table_t opt_tokens = {
{Opt_osdkeepalivetimeout, "osdkeepalive=%d"},
{Opt_mount_timeout, "mount_timeout=%d"},
{Opt_osd_idle_ttl, "osd_idle_ttl=%d"},
+ {Opt_osd_request_timeout, "osd_request_timeout=%d"},
/* int args above */
{Opt_fsid, "fsid=%s"},
{Opt_name, "name=%s"},
@@ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name,
opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT;
opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT;
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
+ opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
/* get mon ip(s) */
/* ip1[:port1][,ip2[:port2]...] */
@@ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name,
}
opt->mount_timeout = msecs_to_jiffies(intval * 1000);
break;
+ case Opt_osd_request_timeout:
+ /* 0 is "wait forever" (i.e. infinite timeout) */
+ if (intval < 0 || intval > INT_MAX / 1000) {
+ pr_err("osd_request_timeout out of range\n");
+ err = -EINVAL;
+ goto out;
+ }
+ opt->osd_request_timeout = msecs_to_jiffies(intval * 1000);
+ break;
case Opt_share:
opt->flags &= ~CEPH_OPT_NOSHARE;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 842f049..2f5a024 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2554,6 +2554,75 @@ static void handle_timeout(struct work_struct *work)
osdc->client->options->osd_keepalive_timeout);
}
+/*
+ * Complete request to OSD with error err if it started before cutoff.
+ */
+static void complete_osd_stuck_request(struct ceph_osd_request *req,
+ unsigned long cutoff,
+ int err)
+{
+ if (!time_before(req->r_stamp, cutoff))
+ return;
+
+ pr_warn_ratelimited("osd req on osd%d timeout expired\n",
+ req->r_osd->o_osd);
+
+ complete_request(req, err);
+}
+
+/*
+ * Complete all requests to OSD that has been active for more than timeout.
+ */
+static void complete_osd_stuck_requests(struct ceph_osd *osd,
+ unsigned long timeout,
+ int err)
+{
+ struct ceph_osd_request *req, *n;
+ const unsigned long cutoff = jiffies - timeout;
+
+ rbtree_postorder_for_each_entry_safe(req, n, &osd->o_requests, r_node) {
+ complete_osd_stuck_request(req, cutoff, -EIO);
+ }
+}
+
+/*
+ * Timeout callback, called every N seconds. When 1 or more OSD
+ * requests that has been active for more than N seconds,
+ * we complete it with error EIO.
+ */
+static void handle_osd_request_timeout(struct work_struct *work)
+{
+ struct ceph_osd_client *osdc =
+ container_of(work, struct ceph_osd_client,
+ osd_request_timeout_work.work);
+ struct ceph_osd *osd, *n;
+ struct ceph_options *opts = osdc->client->options;
+ const unsigned long timeout = opts->osd_request_timeout;
+
+ dout("%s osdc %p\n", __func__, osdc);
+
+ if (!timeout)
+ return;
+
+ down_write(&osdc->lock);
+
+ rbtree_postorder_for_each_entry_safe(osd, n, &osdc->osds, o_node) {
+ complete_osd_stuck_requests(osd, timeout, -EIO);
+ }
+
+ up_write(&osdc->lock);
+
+ if (!atomic_read(&osdc->num_homeless))
+ goto out;
+
+ down_write(&osdc->lock);
+ complete_osd_stuck_requests(&osdc->homeless_osd, timeout, -EIO);
+ up_write(&osdc->lock);
+
+out:
+ schedule_delayed_work(&osdc->osd_request_timeout_work, timeout);
+}
+
static void handle_osds_timeout(struct work_struct *work)
{
struct ceph_osd_client *osdc =
@@ -4091,6 +4160,7 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct
ceph_client *client)
osdc->linger_map_checks = RB_ROOT;
INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout);
INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout);
+ INIT_DELAYED_WORK(&osdc->osd_request_timeout_work, handle_osd_request_timeout);
err = -ENOMEM;
osdc->osdmap = ceph_osdmap_alloc();
@@ -4120,6 +4190,8 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct
ceph_client *client)
osdc->client->options->osd_keepalive_timeout);
schedule_delayed_work(&osdc->osds_timeout_work,
round_jiffies_relative(osdc->client->options->osd_idle_ttl));
+ schedule_delayed_work(&osdc->osd_request_timeout_work,
+ round_jiffies_relative(osdc->client->options->osd_request_timeout));
return 0;
--
2.7.4
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html