mpath: don't fail paths on first error

Mike Christie <michaelc@xxxxxxxxxxx> · Thu, 05 Jun 2008 12:25:31 -0500

The problem we see a lot at Red Hat is that if drivers fail a command 
with DID_BUS_BUSY or DID_ERROR for something like underrun or even for 
transient path problems, we can normally recover from this pretty 
quickly and we do not need to switch path groups.

queue_if_no_path/no_path_retry will prevent IO from being fail upwards, 
but just switching paths can cause a lot of strain on the target, so we 
might want to prevent path switching when we do not need to. If we are 
using a box that requires manual failover or a box that does not use 
manual failover but still has to shift resources between storage 
controllers when switching paths, we most likely do not want to mark 
paths failed for these transient errors.

The attached patch allows us to wait X seconds before marking a path as 
failed. If within X seconds from seeing the first IO error, we do not 
see a IO complete successfully then we mark a path as failed. This patch 
work best with the fail fast enhancements ones where for a lot of path 
problems the fast io fail / recovery timeout will fail io quickly to us 
and the test IOs do not get stuck, and where some errors like DID_ERROR 
are not even failed fast.

The patch should apply over linus's tree or scsi-misc.
>From 46dfea9ca7f41d65202fa6580fde24118b42f958 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@xxxxxxxxxxx>
Date: Mon, 2 Jun 2008 02:50:41 -0500
Subject: [PATCH 1/1] dm-mpath: don't fail paths on first error

If we get a transient error then we may not want to fail the path
right away. This patch fails the path after X seconds.

I am not sure how valuable this is. If users just set the no_path_retry
option then we end up with similar results. Without the patch + no_path_retry
then the IO is quickly sent to the new path and has a smaller chance of
getting sent to a queue that is blocked. With the patch we might avoid
some of the path failure messages that scare users. But most users
are not setting no_path_retry. Will they set this new timer?

Signed-off-by: Mike Christie <michaelc@xxxxxxxxxxx>
---
 drivers/md/dm-mpath.c |   36 ++++++++++++++++++++++++++++++++++--
 1 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index e7ee59e..4a24219 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -25,12 +25,19 @@
 #define DM_MSG_PREFIX "multipath"
 #define MESG_STR(x) x, sizeof(x)
 
+/*
+ * TODO: pass this in instead of hard coding it
+ */
+#define DM_DEV_LOSS_TMO 5 * HZ
+
 /* Path properties */
 struct pgpath {
 	struct list_head list;
 
 	struct priority_group *pg;	/* Owning PG */
 	unsigned fail_count;		/* Cumulative failure count */
+	unsigned curr_fail_count;
+	unsigned long fail_start;
 
 	struct dm_path path;
 };
@@ -313,6 +320,14 @@ static int map_io(struct multipath *m, struct bio *bio,
 
 	spin_lock_irqsave(&m->lock, flags);
 
+	/*
+	 * If the path is experiencing problems but is not marked failed,
+	 * then throttle it until IO starts to execute correctly again.
+	 */
+	if (m->current_pgpath && m->current_pgpath->curr_fail_count > 0 &&
+	    m->repeat_count > 1)
+		m->repeat_count = 2;
+
 	/* Do we need to select a new pgpath? */
 	if (!m->current_pgpath ||
 	    (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
@@ -847,7 +862,15 @@ static int fail_path(struct pgpath *pgpath)
 	if (!pgpath->path.is_active)
 		goto out;
 
-	DMWARN("Failing path %s.", pgpath->path.dev->name);
+	if (!pgpath->curr_fail_count) {
+		pgpath->fail_start = jiffies;
+		goto choose_new_path;
+	} else if (time_after_eq(pgpath->fail_start + DM_DEV_LOSS_TMO,
+				 jiffies))
+		goto choose_new_path;
+
+	DMWARN("Failing Path %s current fail count %d.",
+		pgpath->path.dev->name, pgpath->curr_fail_count);
 
 	pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
 	pgpath->path.is_active = 0;
@@ -855,6 +878,9 @@ static int fail_path(struct pgpath *pgpath)
 
 	m->nr_valid_paths--;
 
+choose_new_path:
+	pgpath->curr_fail_count++;
+
 	if (pgpath == m->current_pgpath)
 		m->current_pgpath = NULL;
 
@@ -880,6 +906,9 @@ static int reinstate_path(struct pgpath *pgpath)
 
 	spin_lock_irqsave(&m->lock, flags);
 
+	pgpath->fail_start = 0;
+	pgpath->curr_fail_count = 0;
+
 	if (pgpath->path.is_active)
 		goto out;
 
@@ -1073,8 +1102,11 @@ static int do_end_io(struct multipath *m, struct bio *bio,
 	unsigned err_flags = MP_FAIL_PATH;	/* Default behavior */
 	unsigned long flags;
 
-	if (!error)
+	if (!error) {
+		mpio->pgpath->curr_fail_count = 0;
+		mpio->pgpath->fail_start = 0;
 		return 0;	/* I/O complete */
+	}
 
 	if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
 		return error;
-- 
1.5.4.1