On Sun, Oct 17, 2021 at 6:50 AM Guoqing Jiang <guoqing.jiang@xxxxxxxxx> wrote: > > Add a helper to find error_dev in case handle_read_err is true. > > Signed-off-by: Guoqing Jiang <guoqing.jiang@xxxxxxxxx> For 2/3 and 3/3, I was thinking about something like below (only compile tested). Would this work? Thanks, Song diff --git i/drivers/md/raid10.c w/drivers/md/raid10.c index dde98f65bd04f..c2387f55343dd 100644 --- i/drivers/md/raid10.c +++ w/drivers/md/raid10.c @@ -1116,7 +1116,7 @@ static void regular_request_wait(struct mddev *mddev, struct r10conf *conf, } static void raid10_read_request(struct mddev *mddev, struct bio *bio, - struct r10bio *r10_bio) + struct r10bio *r10_bio, struct md_rdev *err_rdev) { struct r10conf *conf = mddev->private; struct bio *read_bio; @@ -1126,36 +1126,17 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio, struct md_rdev *rdev; char b[BDEVNAME_SIZE]; int slot = r10_bio->read_slot; - struct md_rdev *err_rdev = NULL; gfp_t gfp = GFP_NOIO; - if (slot >= 0 && r10_bio->devs[slot].rdev) { - /* - * This is an error retry, but we cannot - * safely dereference the rdev in the r10_bio, - * we must use the one in conf. - * If it has already been disconnected (unlikely) - * we lose the device name in error messages. - */ - int disk; - /* - * As we are blocking raid10, it is a little safer to - * use __GFP_HIGH. - */ + /* + * As we are blocking raid10, it is a little safer to + * use __GFP_HIGH. + */ + if (err_rdev) { gfp = GFP_NOIO | __GFP_HIGH; - - rcu_read_lock(); - disk = r10_bio->devs[slot].devnum; - err_rdev = rcu_dereference(conf->mirrors[disk].rdev); - if (err_rdev) - bdevname(err_rdev->bdev, b); - else { - strcpy(b, "???"); - /* This never gets dereferenced */ - err_rdev = r10_bio->devs[slot].rdev; - } - rcu_read_unlock(); - } + bdevname(err_rdev->bdev, b); + } else + strcpy(b, "???"); regular_request_wait(mddev, conf, bio, r10_bio->sectors); rdev = read_balance(conf, r10_bio, &max_sectors); @@ -1519,7 +1500,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio, int sectors) conf->geo.raid_disks); if (bio_data_dir(bio) == READ) - raid10_read_request(mddev, bio, r10_bio); + raid10_read_request(mddev, bio, r10_bio, NULL); else raid10_write_request(mddev, bio, r10_bio); } @@ -2887,6 +2868,31 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) return ok; } +static struct md_rdev *get_error_dev(struct mddev *mddev, struct r10conf *conf, int slot, + struct r10bio *r10_bio) +{ + struct md_rdev *err_rdev = NULL; + + /* + * This is an error retry, but we cannot + * safely dereference the rdev in the r10_bio, + * we must use the one in conf. + * If it has already been disconnected (unlikely) + * we lose the device name in error messages. + */ + int disk; + + rcu_read_lock(); + disk = r10_bio->devs[slot].devnum; + err_rdev = rcu_dereference(conf->mirrors[disk].rdev); + if (!err_rdev) { + /* This never gets dereferenced */ + err_rdev = r10_bio->devs[slot].rdev; + } + rcu_read_unlock(); + return err_rdev; +} + static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) { int slot = r10_bio->read_slot; @@ -2918,7 +2924,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio) rdev_dec_pending(rdev, mddev); allow_barrier(conf); r10_bio->state = 0; - raid10_read_request(mddev, r10_bio->master_bio, r10_bio); + raid10_read_request(mddev, r10_bio->master_bio, r10_bio, + get_error_dev(mddev, conf, slot, r10_bio)); } static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)