On Mon, Mar 6, 2023 at 5:03 AM Mariusz Tkaczyk <mariusz.tkaczyk@xxxxxxxxxxxxxxx> wrote: > > After the commit 9631abdbf406c("md: Set MD_BROKEN for RAID1 and RAID10") > MD_BROKEN must be set if array is failed because state_store() checks it. > If it is set then -EBUSY is returned to userspace. > > For raid0 and linear MD_BROKEN is not set by error_handler(). As a result > mdadm is unable to trigger clean-up actions. It is a regression. > > This patch adds appropriate error_handler for raid0 and linear. The > error handler sets MD_BROKEN for this device. > > Reviewed-by: Xiao Ni <xni@xxxxxxxxxx> > Signed-off-by: Mariusz Tkaczyk <mariusz.tkaczyk@xxxxxxxxxxxxxxx> Applied to md-next. Thanks, Song > --- > > We decided to drop this patch. Xiao determined that there is a regression > so bringing it back. I can implement it differently to avoid > error_handlers() if you still see them as overhead. > > https://lore.kernel.org/linux-raid/CAPhsuW4ZkqRQpW7UA45m_EB_sGcxL84RAg2JS5ZcZ8seGwMj+g@xxxxxxxxxxxxxx/ > > drivers/md/md-linear.c | 14 +++++++++++++- > drivers/md/md.c | 3 +++ > drivers/md/md.h | 10 ++-------- > drivers/md/raid0.c | 14 +++++++++++++- > 4 files changed, 31 insertions(+), 10 deletions(-) > > diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c > index 6e7797b4e738..4eb72b9dd933 100644 > --- a/drivers/md/md-linear.c > +++ b/drivers/md/md-linear.c > @@ -223,7 +223,8 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) > bio_sector < start_sector)) > goto out_of_bounds; > > - if (unlikely(is_mddev_broken(tmp_dev->rdev, "linear"))) { > + if (unlikely(is_rdev_broken(tmp_dev->rdev))) { > + md_error(mddev, tmp_dev->rdev); > bio_io_error(bio); > return true; > } > @@ -270,6 +271,16 @@ static void linear_status (struct seq_file *seq, struct mddev *mddev) > seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); > } > > +static void linear_error(struct mddev *mddev, struct md_rdev *rdev) > +{ > + if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { > + char *md_name = mdname(mddev); > + > + pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n", > + md_name, rdev->bdev); > + } > +} > + > static void linear_quiesce(struct mddev *mddev, int state) > { > } > @@ -286,6 +297,7 @@ static struct md_personality linear_personality = > .hot_add_disk = linear_add, > .size = linear_size, > .quiesce = linear_quiesce, > + .error_handler = linear_error, > }; > > static int __init linear_init (void) > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 927a43db5dfb..d95cf47ff924 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -7974,6 +7974,9 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) > return; > mddev->pers->error_handler(mddev, rdev); > > + if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR) > + return; > + > if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags)) > set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); > sysfs_notify_dirent_safe(rdev->sysfs_state); > diff --git a/drivers/md/md.h b/drivers/md/md.h > index e148e3c83b0d..fd8f260ed5f8 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -790,15 +790,9 @@ extern void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev, > struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr); > struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev); > > -static inline bool is_mddev_broken(struct md_rdev *rdev, const char *md_type) > +static inline bool is_rdev_broken(struct md_rdev *rdev) > { > - if (!disk_live(rdev->bdev->bd_disk)) { > - if (!test_and_set_bit(MD_BROKEN, &rdev->mddev->flags)) > - pr_warn("md: %s: %s array has a missing/failed member\n", > - mdname(rdev->mddev), md_type); > - return true; > - } > - return false; > + return !disk_live(rdev->bdev->bd_disk); > } > > static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev) > diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c > index b536befd8898..f8ee9a95e25d 100644 > --- a/drivers/md/raid0.c > +++ b/drivers/md/raid0.c > @@ -569,8 +569,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) > return true; > } > > - if (unlikely(is_mddev_broken(tmp_dev, "raid0"))) { > + if (unlikely(is_rdev_broken(tmp_dev))) { > bio_io_error(bio); > + md_error(mddev, tmp_dev); > return true; > } > > @@ -592,6 +593,16 @@ static void raid0_status(struct seq_file *seq, struct mddev *mddev) > return; > } > > +static void raid0_error(struct mddev *mddev, struct md_rdev *rdev) > +{ > + if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { > + char *md_name = mdname(mddev); > + > + pr_crit("md/raid0%s: Disk failure on %pg detected, failing array.\n", > + md_name, rdev->bdev); > + } > +} > + > static void *raid0_takeover_raid45(struct mddev *mddev) > { > struct md_rdev *rdev; > @@ -767,6 +778,7 @@ static struct md_personality raid0_personality= > .size = raid0_size, > .takeover = raid0_takeover, > .quiesce = raid0_quiesce, > + .error_handler = raid0_error, > }; > > static int __init raid0_init (void) > -- > 2.26.2 >