The MD driver for level-456 should prevent re-reading read errors. For redundant raid it makes no sense to retry the operation: When one of the disks in the array hits a read error, that will cause a stall for the reading process: - either the read succeeds (e.g. after 4 seconds the HDD error strategy could read the sector) - or it fails after HDD imposed timeout (w/TLER, e.g. after 7 seconds (might be even longer) The user can enable/disable this functionality by the following commands: To Enable: echo 1 > /proc/sys/dev/raid/raid456_retry_read_error To Disable, type the following at anytime: echo 0 > /proc/sys/dev/raid/raid456_retry_read_error Signed-off-by: Nigel Croxon <ncroxon@xxxxxxxxxx> --- drivers/md/md.c | 43 +++++++++++++++++++++++++++++++++++++++++++ drivers/md/md.h | 3 +++ drivers/md/raid5.c | 3 ++- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 6f0ecfe8eab2..75b8b0615328 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -125,6 +125,12 @@ static inline int speed_max(struct mddev *mddev) mddev->sync_speed_max : sysctl_speed_limit_max; } +static int sysctl_raid456_retry_read_error = 0; +static inline void set_raid456_retry_re(struct mddev *mddev, int re) +{ + (re ? set_bit : clear_bit)(MD_RAID456_RETRY_RE, &mddev->flags); +} + static int rdev_init_wb(struct md_rdev *rdev) { if (rdev->bdev->bd_queue->nr_hw_queues == 1) @@ -213,6 +219,13 @@ static struct ctl_table raid_table[] = { .mode = S_IRUGO|S_IWUSR, .proc_handler = proc_dointvec, }, + { + .procname = "raid456_retry_read_error", + .data = &sysctl_raid456_retry_read_error, + .maxlen = sizeof(int), + .mode = S_IRUGO|S_IWUSR, + .proc_handler = proc_dointvec, + }, { } }; @@ -4771,6 +4784,32 @@ mismatch_cnt_show(struct mddev *mddev, char *page) static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt); +static ssize_t +raid456_retry_re_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "RAID456 retry Read Error = %u\n", + test_bit(MD_RAID456_RETRY_RE, &mddev->flags)); +} + +static ssize_t raid456_retry_re_store(struct mddev *mddev, const char *buf, size_t len) +{ + int retry; + + if (!mddev->private) + return -ENODEV; + + if (len > 1 || + kstrtoint(buf, 10, &retry) || + retry < 0 || retry > 1) + return -EINVAL; + + set_raid456_retry_re(mddev, retry); + return len; +} + +static struct md_sysfs_entry md_raid456_retry_read_error = +__ATTR(raid456_retry_read_error, S_IRUGO|S_IWUSR, raid456_retry_re_show, raid456_retry_re_store); + static ssize_t sync_min_show(struct mddev *mddev, char *page) { @@ -5322,6 +5361,7 @@ static struct attribute *md_redundancy_attrs[] = { &md_suspend_hi.attr, &md_bitmap.attr, &md_degraded.attr, + &md_raid456_retry_read_error.attr, NULL, }; static struct attribute_group md_redundancy_group = { @@ -5885,6 +5925,8 @@ static int do_md_run(struct mddev *mddev) if (mddev_is_clustered(mddev)) md_allow_write(mddev); + set_raid456_retry_re(mddev, sysctl_raid456_retry_read_error); + /* run start up tasks that require md_thread */ md_start(mddev); @@ -8463,6 +8505,7 @@ void md_do_sync(struct md_thread *thread) else desc = "recovery"; + set_raid456_retry_re(mddev, sysctl_raid456_retry_read_error); mddev->last_sync_action = action ?: desc; /* we overload curr_resync somewhat here. diff --git a/drivers/md/md.h b/drivers/md/md.h index 5f86f8adb0a4..1e3e3d5eb859 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -254,6 +254,9 @@ enum mddev_flags { MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop * I/O in case an array member is gone/failed. */ + MD_RAID456_RETRY_RE, /* allow user-space to request RAID456 + * retry read errors + */ }; enum mddev_sb_flags { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 12a8ce83786e..63c616b996b6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2567,7 +2567,8 @@ static void raid5_end_read_request(struct bio * bi) if (retry) if (sh->qd_idx >= 0 && sh->pd_idx == i) set_bit(R5_ReadError, &sh->dev[i].flags); - else if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) { + else if ((test_bit(R5_ReadNoMerge, &sh->dev[i].flags)) || + (test_bit(MD_RAID456_RETRY_RE, &conf->mddev->flags))) { set_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReadNoMerge, &sh->dev[i].flags); } else -- 2.20.1