Re: [PATCH 3/3] md/raid10: factor out a get_error_dev helper

Song Liu <song@xxxxxxxxxx> · Mon, 18 Oct 2021 23:59:03 -0700

On Sun, Oct 17, 2021 at 6:50 AM Guoqing Jiang <guoqing.jiang@xxxxxxxxx> wrote:
>
> Add a helper to find error_dev in case handle_read_err is true.
>
> Signed-off-by: Guoqing Jiang <guoqing.jiang@xxxxxxxxx>

For 2/3 and 3/3, I was thinking about something like below (only
compile tested).
Would this work?

Thanks,
Song

diff --git i/drivers/md/raid10.c w/drivers/md/raid10.c
index dde98f65bd04f..c2387f55343dd 100644
--- i/drivers/md/raid10.c
+++ w/drivers/md/raid10.c
@@ -1116,7 +1116,7 @@ static void regular_request_wait(struct mddev
*mddev, struct r10conf *conf,
 }

 static void raid10_read_request(struct mddev *mddev, struct bio *bio,
-                               struct r10bio *r10_bio)
+                               struct r10bio *r10_bio, struct md_rdev
*err_rdev)
 {
        struct r10conf *conf = mddev->private;
        struct bio *read_bio;
@@ -1126,36 +1126,17 @@ static void raid10_read_request(struct mddev
*mddev, struct bio *bio,
        struct md_rdev *rdev;
        char b[BDEVNAME_SIZE];
        int slot = r10_bio->read_slot;
-       struct md_rdev *err_rdev = NULL;
        gfp_t gfp = GFP_NOIO;

-       if (slot >= 0 && r10_bio->devs[slot].rdev) {
-               /*
-                * This is an error retry, but we cannot
-                * safely dereference the rdev in the r10_bio,
-                * we must use the one in conf.
-                * If it has already been disconnected (unlikely)
-                * we lose the device name in error messages.
-                */
-               int disk;
-               /*
-                * As we are blocking raid10, it is a little safer to
-                * use __GFP_HIGH.
-                */
+       /*
+        * As we are blocking raid10, it is a little safer to
+        * use __GFP_HIGH.
+        */
+       if (err_rdev) {
                gfp = GFP_NOIO | __GFP_HIGH;
-
-               rcu_read_lock();
-               disk = r10_bio->devs[slot].devnum;
-               err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
-               if (err_rdev)
-                       bdevname(err_rdev->bdev, b);
-               else {
-                       strcpy(b, "???");
-                       /* This never gets dereferenced */
-                       err_rdev = r10_bio->devs[slot].rdev;
-               }
-               rcu_read_unlock();
-       }
+               bdevname(err_rdev->bdev, b);
+       } else
+               strcpy(b, "???");

        regular_request_wait(mddev, conf, bio, r10_bio->sectors);
        rdev = read_balance(conf, r10_bio, &max_sectors);
@@ -1519,7 +1500,7 @@ static void __make_request(struct mddev *mddev,
struct bio *bio, int sectors)
                        conf->geo.raid_disks);

        if (bio_data_dir(bio) == READ)
-               raid10_read_request(mddev, bio, r10_bio);
+               raid10_read_request(mddev, bio, r10_bio, NULL);
        else
                raid10_write_request(mddev, bio, r10_bio);
 }
@@ -2887,6 +2868,31 @@ static int narrow_write_error(struct r10bio
*r10_bio, int i)
        return ok;
 }

+static struct md_rdev *get_error_dev(struct mddev *mddev, struct
r10conf *conf, int slot,
+                                    struct r10bio *r10_bio)
+{
+       struct md_rdev *err_rdev = NULL;
+
+       /*
+        * This is an error retry, but we cannot
+        * safely dereference the rdev in the r10_bio,
+        * we must use the one in conf.
+        * If it has already been disconnected (unlikely)
+        * we lose the device name in error messages.
+        */
+       int disk;
+
+       rcu_read_lock();
+       disk = r10_bio->devs[slot].devnum;
+       err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
+       if (!err_rdev) {
+               /* This never gets dereferenced */
+               err_rdev = r10_bio->devs[slot].rdev;
+       }
+       rcu_read_unlock();
+       return err_rdev;
+}
+
 static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
 {
        int slot = r10_bio->read_slot;
@@ -2918,7 +2924,8 @@ static void handle_read_error(struct mddev
*mddev, struct r10bio *r10_bio)
        rdev_dec_pending(rdev, mddev);
        allow_barrier(conf);
        r10_bio->state = 0;
-       raid10_read_request(mddev, r10_bio->master_bio, r10_bio);
+       raid10_read_request(mddev, r10_bio->master_bio, r10_bio,
+                           get_error_dev(mddev, conf, slot, r10_bio));
 }

 static void handle_write_completed(struct r10conf *conf, struct
r10bio *r10_bio)