[md PATCH 06/16] md/raid1: clean up read_balance.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



read_balance has three loops which all look for a 'best'
device based on slightly different criteria.
This is clumsy and makes is hard to add extra criteria.

So replace it all with a single loop that combines everything.

Signed-off-by: NeilBrown <neilb@xxxxxxx>
---
 drivers/md/raid1.c |  144 ++++++++++++++++++++++------------------------------
 1 files changed, 60 insertions(+), 84 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 82440a7..fa62c7b 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -420,10 +420,13 @@ static void raid1_end_write_request(struct bio *bio, int error)
 static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 {
 	const sector_t this_sector = r1_bio->sector;
-	int new_disk = conf->last_used, disk = new_disk;
-	int wonly_disk = -1;
 	const int sectors = r1_bio->sectors;
-	sector_t new_distance, current_distance;
+	int do_balance;
+	int disk;
+	int start_disk;
+	int best_disk;
+	int i;
+	sector_t best_dist;
 	mdk_rdev_t *rdev;
 
 	rcu_read_lock();
@@ -433,100 +436,73 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 	 * We take the first readable disk when above the resync window.
 	 */
  retry:
+	disk = -1;
+	best_disk = -1;
+	best_dist = MaxSector;
 	if (conf->mddev->recovery_cp < MaxSector &&
 	    (this_sector + sectors >= conf->next_resync)) {
-		/* Choose the first operational device, for consistancy */
-		new_disk = 0;
-
-		for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
-		     r1_bio->bios[new_disk] == IO_BLOCKED ||
-		     !rdev || !test_bit(In_sync, &rdev->flags)
-			     || test_bit(WriteMostly, &rdev->flags);
-		     rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
-
-			if (rdev && test_bit(In_sync, &rdev->flags) &&
-				r1_bio->bios[new_disk] != IO_BLOCKED)
-				wonly_disk = new_disk;
-
-			if (new_disk == conf->raid_disks - 1) {
-				new_disk = wonly_disk;
-				break;
-			}
-		}
-		goto rb_out;
+		/* just choose the first */
+		start_disk = 0;
+		do_balance = 0;
+	} else {
+		/* Else start from last used */
+		start_disk = conf->last_used;
+		do_balance = 1;
 	}
+	for (i = 0; i < conf->raid_disks; i++) {
+		sector_t dist;
 
+		disk = (start_disk + i) % conf->raid_disks;
+		if (r1_bio->bios[disk] == IO_BLOCKED)
+			continue;
+		rdev = rcu_dereference(conf->mirrors[disk].rdev);
+		if (!rdev)
+			continue;
+		if (test_bit(Faulty, &rdev->flags))
+			continue;
+		if (!test_bit(In_sync, &rdev->flags) &&
+		    rdev->recovery_offset < this_sector + sectors)
+			continue;
 
-	/* make sure the disk is operational */
-	for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
-	     r1_bio->bios[new_disk] == IO_BLOCKED ||
-	     !rdev || !test_bit(In_sync, &rdev->flags) ||
-		     test_bit(WriteMostly, &rdev->flags);
-	     rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {
+		if (test_bit(WriteMostly, &rdev->flags)) {
+			/* don't balance among write-mostly, just
+			 * use first as a last resort */
+			if (best_disk < 0)
+				best_disk = disk;
+			continue;
+		}
+		/* This is a reasonable device to use.  It might
+		 * even be best.
+		 */
+		if (!do_balance)
+			break;
 
-		if (rdev && test_bit(In_sync, &rdev->flags) &&
-		    r1_bio->bios[new_disk] != IO_BLOCKED)
-			wonly_disk = new_disk;
+		/*
+		 * Don't change to another disk for sequential reads:
+		 */
+		if (conf->next_seq_sect == this_sector)
+			break;
 
-		if (new_disk <= 0)
-			new_disk = conf->raid_disks;
-		new_disk--;
-		if (new_disk == disk) {
-			new_disk = wonly_disk;
+		dist = abs(this_sector - conf->mirrors[disk].head_position);
+		if (dist == 0)
+			break;
+		if (!atomic_read(&rdev->nr_pending))
+			/* Device is idle, so use it */
 			break;
+		if (dist < best_dist) {
+			best_dist = dist;
+			best_disk = disk;
 		}
 	}
+	if (i == conf->raid_disks)
+		disk = best_disk;
 
-	if (new_disk < 0)
-		goto rb_out;
-
-	disk = new_disk;
-	/* now disk == new_disk == starting point for search */
-
-	/*
-	 * Don't change to another disk for sequential reads:
-	 */
-	if (conf->next_seq_sect == this_sector)
-		goto rb_out;
-	if (this_sector == conf->mirrors[new_disk].head_position)
-		goto rb_out;
-
-	current_distance = abs(this_sector - conf->mirrors[disk].head_position);
-
-	/* Find the disk whose head is closest */
-
-	do {
-		if (disk <= 0)
-			disk = conf->raid_disks;
-		disk--;
-
+	if (disk >= 0) {
 		rdev = rcu_dereference(conf->mirrors[disk].rdev);
-
-		if (!rdev || r1_bio->bios[disk] == IO_BLOCKED ||
-		    !test_bit(In_sync, &rdev->flags) ||
-		    test_bit(WriteMostly, &rdev->flags))
-			continue;
-
-		if (!atomic_read(&rdev->nr_pending)) {
-			new_disk = disk;
-			break;
-		}
-		new_distance = abs(this_sector - conf->mirrors[disk].head_position);
-		if (new_distance < current_distance) {
-			current_distance = new_distance;
-			new_disk = disk;
-		}
-	} while (disk != conf->last_used);
-
- rb_out:
-
-
-	if (new_disk >= 0) {
-		rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
 		if (!rdev)
 			goto retry;
 		atomic_inc(&rdev->nr_pending);
-		if (!test_bit(In_sync, &rdev->flags)) {
+		if (test_bit(Faulty, &rdev->flags)) {
 			/* cannot risk returning a device that failed
 			 * before we inc'ed nr_pending
 			 */
@@ -534,11 +510,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 			goto retry;
 		}
 		conf->next_seq_sect = this_sector + sectors;
-		conf->last_used = new_disk;
+		conf->last_used = disk;
 	}
 	rcu_read_unlock();
 
-	return new_disk;
+	return disk;
 }
 
 static void unplug_slaves(mddev_t *mddev)


--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux