[PATCH 3/3] md/raid10: handle replacement devices in fix_read_error

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Li Nan <linan122@xxxxxxxxxx>

In fix_read_error(), the handling of replacement devices is missing. If
read replacement device errors, we will attempt to fix 'mirror->rdev'.
It is wrong. Get rdev from r10bio to ensure that the fixed device is the
one which read error occurred.

Signed-off-by: Li Nan <linan122@xxxxxxxxxx>
---
 drivers/md/raid10.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a36e53fce21f..4a7c8eaf6ea0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2726,15 +2726,10 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 {
 	int sect = 0; /* Offset from r10_bio->sector */
 	int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
-	struct md_rdev *rdev;
+	struct md_rdev *rdev = r10_bio->devs[slot].rdev;
 	int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
 	int d = r10_bio->devs[slot].devnum;
 
-	/* still own a reference to this rdev, so it cannot
-	 * have been cleared recently.
-	 */
-	rdev = conf->mirrors[d].rdev;
-
 	if (test_bit(Faulty, &rdev->flags))
 		/* drive has already been failed, just ignore any
 		   more fix_read_error() attempts */
@@ -2763,12 +2758,11 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			s = PAGE_SIZE >> 9;
 
 		rcu_read_lock();
+		rdev = r10_bio->devs[slot].rdev;
 		do {
 			sector_t first_bad;
 			int bad_sectors;
 
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
 			if (rdev &&
 			    test_bit(In_sync, &rdev->flags) &&
 			    !test_bit(Faulty, &rdev->flags) &&
@@ -2790,6 +2784,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			sl++;
 			if (sl == conf->copies)
 				sl = 0;
+			d = r10_bio->devs[sl].devnum;
+			rdev = rcu_dereference(conf->mirrors[d].rdev);
 		} while (sl != slot);
 		rcu_read_unlock();
 
@@ -2798,9 +2794,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			 * as bad on the first device to discourage future
 			 * reads.
 			 */
-			int dn = r10_bio->devs[slot].devnum;
-			rdev = conf->mirrors[dn].rdev;
-
+			rdev = r10_bio->devs[slot].rdev;
 			if (!rdev_set_badblocks(
 				    rdev,
 				    r10_bio->devs[slot].addr
@@ -2820,8 +2814,12 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			if (sl==0)
 				sl = conf->copies;
 			sl--;
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
+			if (sl == slot) {
+				rdev = r10_bio->devs[slot].rdev;
+			} else {
+				d = r10_bio->devs[sl].devnum;
+				rdev = rcu_dereference(conf->mirrors[d].rdev);
+			}
 			if (!rdev ||
 			    test_bit(Faulty, &rdev->flags) ||
 			    !test_bit(In_sync, &rdev->flags))
@@ -2854,8 +2852,12 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			if (sl==0)
 				sl = conf->copies;
 			sl--;
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
+			if (sl == slot) {
+				rdev = r10_bio->devs[slot].rdev;
+			} else {
+				d = r10_bio->devs[sl].devnum;
+				rdev = rcu_dereference(conf->mirrors[d].rdev);
+			}
 			if (!rdev ||
 			    test_bit(Faulty, &rdev->flags) ||
 			    !test_bit(In_sync, &rdev->flags))
-- 
2.39.2




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux