Implement basic I/O balancing code (for read/write) for multipath personality. The code is based on RAID1 implementation. Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxx> --- drivers/md/multipath.c | 70 ++++++++++++++++++++++++++++++++++++++--------- drivers/md/multipath.h | 1 + 2 files changed, 57 insertions(+), 14 deletions(-) diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 3535c23af288..83c4f5105705 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -30,29 +30,58 @@ #define NR_RESERVED_BUFS 32 - -static int multipath_map (multipath_conf_t *conf) +/* + * This routine returns the disk from which the requested read should + * be done. There is a per-array 'next expected sequential IO' sector + * number - if this matches on the next IO then we use the last disk. + * There is also a per-disk 'last know head position' sector that is + * maintained from IRQ contexts, IO completion handlers update this + * position correctly. We pick the disk whose head is closest. + * + * Note that 'sector' argument is for original bio whereas 'head_position' + * is maintained for each rdev so we should take it into account when + * calculating the distance. + */ +static int multipath_map(multipath_conf_t *conf, sector_t sector) { int i, disks = conf->raid_disks; - - /* - * Later we do read balancing on the read side - * now we use the first available disk. - */ + int best_disk; + sector_t best_dist; rcu_read_lock(); +retry: + best_disk = -1; + best_dist = MaxSector; + for (i = 0; i < disks; i++) { + int dist; mdk_rdev_t *rdev = rcu_dereference(conf->multipaths[i].rdev); + sector_t this_sector = sector; + if (rdev && test_bit(In_sync, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - return i; + this_sector += rdev->data_offset; + dist = abs(this_sector - conf->multipaths[i].head_position); + if (dist < best_dist) { + best_dist = dist; + best_disk = i; + } } } + + if (best_disk == -1) { + printk(KERN_ERR "multipath_map(): no more operational IO paths?\n"); + } else { + mdk_rdev_t *rdev; + + rdev = rcu_dereference(conf->multipaths[best_disk].rdev); + if (!rdev || !test_bit(In_sync, &rdev->flags)) + goto retry; + + atomic_inc(&rdev->nr_pending); + } rcu_read_unlock(); - printk(KERN_ERR "multipath_map(): no more operational IO paths?\n"); - return (-1); + return best_disk; } static void multipath_reschedule_retry (struct multipath_bh *mp_bh) @@ -82,6 +111,17 @@ static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) mempool_free(mp_bh, conf->pool); } +/* + * Update disk head position estimator based on IRQ completion info. + */ +static inline void update_head_pos(int disk, struct multipath_bh *mp_bh) +{ + multipath_conf_t *conf = mp_bh->mddev->private; + + conf->multipaths[disk].head_position = + mp_bh->bio.bi_sector + (mp_bh->bio.bi_size >> 9); +} + static void multipath_end_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -89,6 +129,8 @@ static void multipath_end_request(struct bio *bio, int error) multipath_conf_t *conf = mp_bh->mddev->private; mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev; + update_head_pos(mp_bh->path, mp_bh); + if (uptodate) multipath_end_bh_io(mp_bh, 0); else if (!(bio->bi_rw & REQ_RAHEAD)) { @@ -122,7 +164,7 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - mp_bh->path = multipath_map(conf); + mp_bh->path = multipath_map(conf, bio->bi_sector); if (mp_bh->path < 0) { bio_endio(bio, -EIO); mempool_free(mp_bh, conf->pool); @@ -356,7 +398,7 @@ static void multipathd (mddev_t *mddev) bio = &mp_bh->bio; bio->bi_sector = mp_bh->master_bio->bi_sector; - if ((mp_bh->path = multipath_map (conf))<0) { + if ((mp_bh->path = multipath_map(conf, bio->bi_sector)) < 0) { printk(KERN_ALERT "multipath: %s: unrecoverable IO read" " error for block %llu\n", bdevname(bio->bi_bdev,b), diff --git a/drivers/md/multipath.h b/drivers/md/multipath.h index 3c5a45eb5f8a..060fe2aabd97 100644 --- a/drivers/md/multipath.h +++ b/drivers/md/multipath.h @@ -3,6 +3,7 @@ struct multipath_info { mdk_rdev_t *rdev; + sector_t head_position; }; struct multipath_private_data { -- 1.7.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html