[dm-devel] [PATCH][RFC] supporting different failover models

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hey Joe,

A good use of the Priority Group framework is to place your primary paths in one group and your secondary, tertiary, etc paths in a higher groups. The problem is that some devices will require a special command be sent before the other paths can be used. And another class of devices will simply perform the switch when IO is sent down the secondary paths.

For the latter group of devices the test IOs are going to cause switches which is going to cause performance problems. By only testing the current group or specifying which groups should be tested and just seperating your paths through priority groups, we can avoid that problem, you do not have to worry about revalidating secondary paths that were previously initially failed, plus I think you can then kill the nr_tested_paths code.

All that is needed to support the devices that require a special command
are some callouts in the priority group framework to initialize the
group before it is used. If this is done from the daemon we can sleep and wait for the result, and you have already provided a way to queue incoming IO from dm. Would it be ok to call this from a target? In general is it going to be better to throttle incoming IO while failed bios are resubmitted?

I have attached a patch (built against 2.6.2-udm1) that begins to implement this. It does not provide the priority group callouts and it is untested and really only meant to show what I mean. Is this direction OK, or what is the intended purpose of the test IO+nr_test_paths code?

I also replaced the dm-daemon usage for a workqueue. It will allow us to do requeueing for different devices in parallel, and it attempts to execute the work on the same processor as it was queued.

Mike

--- linux-2.6.2/drivers/md/dm-mpath.c	2004-02-10 17:33:58.000000000 -0800
+++ linux-2.6.2-work/drivers/md/dm-mpath.c	2004-02-10 23:24:14.000000000 -0800
@@ -16,6 +16,8 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/time.h>
+#include <linux/workqueue.h>
+#include <linux/timer.h>
 #include <asm/atomic.h>
 
 #define MPATH_FAIL_COUNT	1
@@ -62,17 +64,18 @@ struct multipath {
 	struct list_head priority_groups;
 
 	spinlock_t lock;
-	unsigned nr_valid_paths;
-	unsigned nr_tested_paths;
 
 	struct path *current_path;
 	unsigned current_count;
 	unsigned min_io;
 
+	struct work_struct dispatch_failed;
 	struct bio_list failed_ios;
-	struct bio_list test_ios;
 
+	struct work_struct test_pgroup;
+	struct timer_list test_timer;
 	unsigned test_interval;
+
 	unsigned trigger_event;
 };
 
@@ -159,6 +162,9 @@ static void free_priority_group(struct p
 	kfree(pg);
 }
 
+static void dispatch_failed_ios(void *data);
+static void test_pgroup(void *data);
+
 static struct multipath *alloc_multipath(void)
 {
 	struct multipath *m;
@@ -169,6 +175,9 @@ static struct multipath *alloc_multipath
 		INIT_LIST_HEAD(&m->priority_groups);
 		m->lock = SPIN_LOCK_UNLOCKED;
 		m->min_io = MPATH_MIN_IO;
+		INIT_WORK(&m->dispatch_failed, dispatch_failed_ios, m);
+		INIT_WORK(&m->test_pgroup, test_pgroup, m);
+		init_timer(&m->test_timer);
 	}
 
 	return m;
@@ -187,7 +196,7 @@ static void free_multipath(struct multip
 }
 
 /*-----------------------------------------------------------------
- * All paths should be tested periodically.
+ * All valid paths should be tested periodically.
  *---------------------------------------------------------------*/
 static void iterate_paths(struct multipath *m, void (*fn)(struct path *p))
 {
@@ -200,15 +209,9 @@ static void iterate_paths(struct multipa
 	}
 }
 
-static void clear_tested(struct path *p)
-{
-	p->tested = 0;
-}
-
 static void fail_path(struct path *path)
 {
 	unsigned long flags;
-	struct multipath *m;
 
 	spin_lock_irqsave(&path->failed_lock, flags);
 
@@ -218,24 +221,6 @@ static void fail_path(struct path *path)
 		path->fail_total++;
 		path->pg->ps->type->set_path_state(path->pg->ps, path, 0);
 		path->pg->m->trigger_event = 1;
-
-		m = path->pg->m;
-		spin_lock(&m->lock);
-		m->nr_valid_paths--;
-		if (!m->nr_valid_paths) {
-			iterate_paths(m, clear_tested);
-			m->nr_tested_paths = 0;
-		}
-		spin_unlock(&m->lock);
-	}
-
-	if (!path->tested) {
-		path->tested = 1;
-
-		m = path->pg->m;
-		spin_lock(&m->lock);
-		m->nr_tested_paths++;
-		spin_unlock(&m->lock);
 	}
 
 	spin_unlock_irqrestore(&path->failed_lock, flags);
@@ -253,10 +238,6 @@ static void recover_path(struct path *pa
 		path->fail_count = MPATH_FAIL_COUNT;
 		path->pg->ps->type->set_path_state(path->pg->ps, path, 1);
 		m->trigger_event = 1;
-
-		spin_lock(&m->lock);
-		m->nr_valid_paths++;
-		spin_unlock(&m->lock);
 	}
 
 	spin_unlock_irqrestore(&path->failed_lock, flags);
@@ -278,40 +259,61 @@ static int test_endio(struct bio *bio, u
 	return 0;
 }
 
-static void test_path(struct path *p)
+static void test_pgroup(void *data)
 {
-	/*
-	 * If we get this lock we're allowed to issue a test io
-	 * for this path.
-	 */
-	if (down_trylock(&p->test_lock))
-		return;
+	struct multipath *m = (struct multipath *)data;
+	struct priority_group *pg;
+	struct path *p;
+	unsigned long flags;
 
-	p->test_bio->bi_sector = 0;
-	p->test_bio->bi_bdev = p->dev->bdev;
-	p->test_bio->bi_size = bdev_hardsect_size(p->dev->bdev);
-	p->test_bio->bi_idx = 0;
+	spin_lock_irqsave(&m->lock, flags);
+	p = m->current_path;
+	spin_unlock_irqrestore(&m->lock, flags);
 
-	bio_list_add(&p->pg->m->test_ios, p->test_bio);
+	if (!p)
+		return;
+
+	pg = p->pg;
+	list_for_each_entry (p, &pg->paths, list) {
+		/* TODO - add test for path idleness */
+
+		/*
+		 * If we get this lock we're allowed to issue a test io
+		 * for this path.
+		 */
+		if (down_trylock(&p->test_lock))
+			continue;
+	
+		p->test_bio->bi_sector = 0;
+		p->test_bio->bi_bdev = p->dev->bdev;
+		p->test_bio->bi_size = bdev_hardsect_size(p->dev->bdev);
+		p->test_bio->bi_idx = 0;
+	
+		generic_make_request(p->test_bio);
+	}
 }
 
 /*-----------------------------------------------------------------
- * The multipath daemon is responsible for resubmitting failed ios.
+ * The multipath daemon is responsible for submitting failed and
+ * path test ios.
  *---------------------------------------------------------------*/
-static struct dm_daemon _kmpathd;
+static struct workqueue_struct *_kmpathd_wq;
 
-static LIST_HEAD(_mpaths);
-static spinlock_t _mpath_lock = SPIN_LOCK_UNLOCKED;
+static void add_test_timer(struct multipath *m);
 
-static void submit_ios(struct bio *bio)
+static void queue_test_ios(struct multipath *m)
 {
-	struct bio *next;
+	queue_work(_kmpathd_wq, &m->test_pgroup);
+	add_test_timer(m);
+}
 
-	while (bio) {
-		next = bio->bi_next;
-		bio->bi_next = NULL;
-		generic_make_request(bio);
-		bio = next;
+static void add_test_timer(struct multipath *m)
+{
+	if (m->test_interval) {
+		m->test_timer.function = (void (*)(unsigned long))queue_test_ios;
+		m->test_timer.data = (unsigned long)m;
+		m->test_timer.expires = m->test_interval;
+		add_timer(&m->test_timer);
 	}
 }
 
@@ -320,13 +322,11 @@ static int __choose_path(struct multipat
 	struct priority_group *pg;
 	struct path *path = NULL;
 
-	if (m->nr_valid_paths) {
-		/* loop through the priority groups until we find a valid path. */
-		list_for_each_entry (pg, &m->priority_groups, list) {
-			path = pg->ps->type->select_path(pg->ps);
-			if (path)
-				break;
-		}
+	/* loop through the priority groups until we find a valid path. */
+	list_for_each_entry (pg, &m->priority_groups, list) {
+		path = pg->ps->type->select_path(pg->ps);
+		if (path)
+			break;
 	}
 
 	m->current_path = path;
@@ -364,68 +364,31 @@ static int map_io(struct multipath *m, s
 	return 0;
 }
 
-static void dispatch_failed_ios(struct multipath *m)
+static void dispatch_failed_ios(void *data)
 {
+	struct multipath *m = (struct multipath *)data;
 	int r;
 	unsigned long flags;
 	struct bio *bio = NULL, *next;
 
 	spin_lock_irqsave(&m->lock, flags);
-	if (m->nr_valid_paths || (m->nr_tested_paths == m->nr_paths))
-		bio = bio_list_get(&m->failed_ios);
+	bio = bio_list_get(&m->failed_ios);
 	spin_unlock_irqrestore(&m->lock, flags);
 
-
 	while (bio) {
 		next = bio->bi_next;
 		bio->bi_next = NULL;
 
 		r = map_io(m, bio);
 		if (r)
-			/*
-			 * This wont loop forever because the
-			 * end_io function will fail the ios if
-			 * we've tested all the paths.
-			 */
 			bio_io_error(bio, bio->bi_size);
-
 		else
 			generic_make_request(bio);
 
 		bio = next;
 	}
-}
-
-/*
- * Multipathd does this every time it runs, returning a sleep
- * duration hint.
- */
-static jiffy_t do_work(void)
-{
-	unsigned long flags;
-	struct multipath *m;
-	unsigned interval = 0;
-
-	spin_lock(&_mpath_lock);
-	list_for_each_entry (m, &_mpaths, list) {
-		dispatch_failed_ios(m);
-		iterate_paths(m, test_path);
-		submit_ios(bio_list_get(&m->test_ios));
-
-		spin_lock_irqsave(&m->lock, flags);
-		if (m->trigger_event) {
-			dm_table_event(m->ti->table);
-			m->trigger_event = 0;
-		}
-		spin_unlock_irqrestore(&m->lock, flags);
-
-		interval = min_not_zero(interval, m->test_interval);
-	}
-	spin_unlock(&_mpath_lock);
 
 	blk_run_queues();
-
-	return ((jiffy_t) interval) * HZ;
 }
 
 /*-----------------------------------------------------------------
@@ -681,15 +644,11 @@ static int multipath_ctr(struct dm_targe
 			__insert_priority_group(m, pg);
 		}
 	}
-	m->nr_valid_paths = m->nr_paths;
+	add_test_timer(m);
 
 	ti->private = m;
 	m->ti = ti;
 
-	spin_lock(&_mpath_lock);
-	list_add(&m->list, &_mpaths);
-	spin_unlock(&_mpath_lock);
-
 	return 0;
 
  bad:
@@ -700,11 +659,6 @@ static int multipath_ctr(struct dm_targe
 static void multipath_dtr(struct dm_target *ti)
 {
 	struct multipath *m = (struct multipath *) ti->private;
-
-	spin_lock(&_mpath_lock);
-	list_del(&m->list);
-	spin_unlock(&_mpath_lock);
-
 	free_multipath(m);
 }
 
@@ -745,13 +699,6 @@ static int multipath_end_io(struct dm_ta
 	struct multipath *m = (struct multipath *) ti->private;
 
 	if (error) {
-		spin_lock(&m->lock);
-		if (!m->nr_valid_paths && (m->nr_tested_paths == m->nr_paths)) {
-			spin_unlock(&m->lock);
-			return -EIO;
-		}
-		spin_unlock(&m->lock);
-
 		path = find_path(m, bio->bi_bdev);
 		fail_path(path);
 
@@ -760,7 +707,7 @@ static int multipath_end_io(struct dm_ta
 		bio_list_add(&m->failed_ios, bio);
 		spin_unlock(&m->lock);
 
-		dm_daemon_wake(&_kmpathd);
+		queue_work(_kmpathd_wq, &m->dispatch_failed);
 		return 1;	/* io not complete */
 	}
 
@@ -887,8 +834,8 @@ int __init dm_multipath_init(void)
 		return r;
 	}
 
-	r = dm_daemon_start(&_kmpathd, "kpathd", do_work);
-	if (r) {
+	_kmpathd_wq = create_workqueue("kmpathd");
+	if (!_kmpathd_wq) {
 		/* FIXME: remove this */
 		dm_unregister_path_selectors();
 		dm_unregister_target(&multipath_target);
@@ -902,7 +849,7 @@ void __exit dm_multipath_exit(void)
 {
 	int r;
 
-	dm_daemon_stop(&_kmpathd);
+	destroy_workqueue(_kmpathd_wq);
 	dm_unregister_path_selectors();
 	r = dm_unregister_target(&multipath_target);
 	if (r < 0)

[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux