Re: [PATCH 3/3] md/raid10: factor out a get_error_dev helper

Guoqing Jiang <guoqing.jiang@xxxxxxxxx> · Tue, 19 Oct 2021 16:00:30 +0800

On 10/19/21 2:59 PM, Song Liu wrote:
On Sun, Oct 17, 2021 at 6:50 AM Guoqing Jiang<guoqing.jiang@xxxxxxxxx>  wrote:
Add a helper to find error_dev in case handle_read_err is true.

Signed-off-by: Guoqing Jiang<guoqing.jiang@xxxxxxxxx>
For 2/3 and 3/3, I was thinking about something like below (only
compile tested).
Would this work?

Thanks for clarification, I guess it works.

Thanks,
Song

diff --git i/drivers/md/raid10.c w/drivers/md/raid10.c
index dde98f65bd04f..c2387f55343dd 100644
--- i/drivers/md/raid10.c
+++ w/drivers/md/raid10.c
@@ -1116,7 +1116,7 @@ static void regular_request_wait(struct mddev
*mddev, struct r10conf *conf,
  }

  static void raid10_read_request(struct mddev *mddev, struct bio *bio,
-                               struct r10bio *r10_bio)
+                               struct r10bio *r10_bio, struct md_rdev
*err_rdev)
  {
         struct r10conf *conf = mddev->private;
         struct bio *read_bio;
@@ -1126,36 +1126,17 @@ static void raid10_read_request(struct mddev
*mddev, struct bio *bio,
         struct md_rdev *rdev;
         char b[BDEVNAME_SIZE];
         int slot = r10_bio->read_slot;
-       struct md_rdev *err_rdev = NULL;
         gfp_t gfp = GFP_NOIO;

-       if (slot >= 0 && r10_bio->devs[slot].rdev) {
-               /*
-                * This is an error retry, but we cannot
-                * safely dereference the rdev in the r10_bio,
-                * we must use the one in conf.
-                * If it has already been disconnected (unlikely)
-                * we lose the device name in error messages.
-                */
-               int disk;
-               /*
-                * As we are blocking raid10, it is a little safer to
-                * use __GFP_HIGH.
-                */
+       /*
+        * As we are blocking raid10, it is a little safer to
+        * use __GFP_HIGH.
+        */
+       if (err_rdev) {
                 gfp = GFP_NOIO | __GFP_HIGH;
-
-               rcu_read_lock();
-               disk = r10_bio->devs[slot].devnum;
-               err_rdev = rcu_dereference(conf->mirrors[disk].rdev);
-               if (err_rdev)
-                       bdevname(err_rdev->bdev, b);
-               else {
-                       strcpy(b, "???");
-                       /* This never gets dereferenced */
-                       err_rdev = r10_bio->devs[slot].rdev;
-               }
-               rcu_read_unlock();
-       }
+               bdevname(err_rdev->bdev, b);
+       } else
+               strcpy(b, "???");

         regular_request_wait(mddev, conf, bio, r10_bio->sectors);
         rdev = read_balance(conf, r10_bio, &max_sectors);
@@ -1519,7 +1500,7 @@ static void __make_request(struct mddev *mddev,
struct bio *bio, int sectors)
                         conf->geo.raid_disks);

         if (bio_data_dir(bio) == READ)
-               raid10_read_request(mddev, bio, r10_bio);
+               raid10_read_request(mddev, bio, r10_bio, NULL);
         else
                 raid10_write_request(mddev, bio, r10_bio);
  }
@@ -2887,6 +2868,31 @@ static int narrow_write_error(struct r10bio
*r10_bio, int i)
         return ok;
  }

+static struct md_rdev *get_error_dev(struct mddev *mddev, struct
r10conf *conf, int slot,
+                                    struct r10bio *r10_bio)
+{
+       struct md_rdev *err_rdev = NULL;
+

We need the original check ("slot >= 0 && r10_bio->devs[slot].rdev ")
in the function I think. Will take a closer look and send a new version.

Thanks,
Guoqing