Hi Guoqing, Thanks for review. Song already merged that, so I will adress issues in the new patch. On Sat, 12 Feb 2022 09:17:57 +0800 Guoqing Jiang <guoqing.jiang@xxxxxxxxx> wrote: > On 1/27/22 11:39 PM, Mariusz Tkaczyk wrote: > > diff --git a/drivers/md/md.c b/drivers/md/md.c > > index f888ef197765..fda8473f96b8 100644 > > --- a/drivers/md/md.c > > +++ b/drivers/md/md.c > > @@ -2983,10 +2983,11 @@ state_store(struct md_rdev *rdev, const > > char *buf, size_t len) > > if (cmd_match(buf, "faulty") && rdev->mddev->pers) { > > md_error(rdev->mddev, rdev); > > - if (test_bit(Faulty, &rdev->flags)) > > - err = 0; > > - else > > + > > + if (test_bit(MD_BROKEN, &rdev->mddev->flags)) > > err = -EBUSY; > > + else > > + err = 0; > > Again, it is weird to check MD_BROKEN which is for mddev in rdev > specific function from > my understanding. As I wrote in description: "-EBUSY expectation cannot be removed without breaking compatibility with userspace." I agree with your opinion but I would like to not introduce regression here. > > > } else if (cmd_match(buf, "remove")) { > > if (rdev->mddev->pers) { > > clear_bit(Blocked, &rdev->flags); > > @@ -7441,7 +7442,7 @@ static int set_disk_faulty(struct mddev > > *mddev, dev_t dev) err = -ENODEV; > > else { > > md_error(mddev, rdev); > > - if (!test_bit(Faulty, &rdev->flags)) > > + if (test_bit(MD_BROKEN, &mddev->flags)) > > err = -EBUSY; > > Ditto. > > > } > > rcu_read_unlock(); > > @@ -7987,12 +7988,14 @@ void md_error(struct mddev *mddev, struct > > md_rdev *rdev) if (!mddev->pers->sync_request) > > return; > > > > - if (mddev->degraded) > > + if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags)) > > set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); > > sysfs_notify_dirent_safe(rdev->sysfs_state); > > set_bit(MD_RECOVERY_INTR, &mddev->recovery); > > - set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > > - md_wakeup_thread(mddev->thread); > > + if (!test_bit(MD_BROKEN, &mddev->flags)) { > > + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > > + md_wakeup_thread(mddev->thread); > > + } > > if (mddev->event_work.func) > > queue_work(md_misc_wq, &mddev->event_work); > > md_new_event(); > > diff --git a/drivers/md/md.h b/drivers/md/md.h > > index bc3f2094d0b6..1eb7d0e88cb2 100644 > > --- a/drivers/md/md.h > > +++ b/drivers/md/md.h > > @@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct > > md_rdev *rdev, sector_t s, int sectors, int is_new); > > struct md_cluster_info; > > > > -/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag > > is added */ +/** > > + * enum mddev_flags - md device flags. > > + * @MD_ARRAY_FIRST_USE: First use of array, needs initialization. > > + * @MD_CLOSING: If set, we are closing the array, do not open it > > then. > > + * @MD_JOURNAL_CLEAN: A raid with journal is already clean. > > + * @MD_HAS_JOURNAL: The raid array has journal feature set. > > + * @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, > > already took > > + * resync lock, need to release the > > lock. > > + * @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is > > supported as > > + * calls to md_error() will never cause > > the array to > > + * become failed. > > + * @MD_HAS_PPL: The raid array has PPL feature set. > > + * @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature > > set. > > + * @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the > > metadata > > + * without taking reconfig_mutex. > > + * @MD_UPDATING_SB: md_check_recovery is updating the metadata > > without > > + * explicitly holding reconfig_mutex. > > + * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not > > report that > > + * array is ready yet. > > + * @MD_BROKEN: This is used to stop writes and mark array as > > failed. > > + * > > + * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag > > is added > > + */ > > enum mddev_flags { > > - MD_ARRAY_FIRST_USE, /* First use of array, needs > > initialization */ > > - MD_CLOSING, /* If set, we are closing the > > array, do not open > > - * it then */ > > - MD_JOURNAL_CLEAN, /* A raid with journal is already > > clean */ > > - MD_HAS_JOURNAL, /* The raid array has > > journal feature set */ > > - MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which > > means node > > - * already took resync lock, > > need to > > - * release the lock */ > > - MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on > > metadata writes is > > - * supported as calls to > > md_error() will > > - * never cause the array to become > > failed. > > - */ > > - MD_HAS_PPL, /* The raid array has PPL > > feature set */ > > - MD_HAS_MULTIPLE_PPLS, /* The raid array has > > multiple PPLs feature set */ > > - MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed > > to update > > - * the metadata without taking > > reconfig_mutex. > > - */ > > - MD_UPDATING_SB, /* md_check_recovery is > > updating the metadata > > - * without explicitly holding > > reconfig_mutex. > > - */ > > - MD_NOT_READY, /* do_md_run() is active, so > > 'array_state' > > - * must not report that array is > > ready yet > > - */ > > - MD_BROKEN, /* This is used in RAID-0/LINEAR > > only, to stop > > - * I/O in case an array member is > > gone/failed. > > - */ > > People have to scroll up to see the meaning of each flag with the > change, I would > suggest move these comments on top of each flag if you really hate > previous style, > but just my personal flavor. Kernel has well-defined definition and comment rules so I followed them: https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html Personally, I think that we should follow. It gives as a change to generate documentation. > > > + MD_ARRAY_FIRST_USE, > > + MD_CLOSING, > > + MD_JOURNAL_CLEAN, > > + MD_HAS_JOURNAL, > > + MD_CLUSTER_RESYNC_LOCKED, > > + MD_FAILFAST_SUPPORTED, > > + MD_HAS_PPL, > > + MD_HAS_MULTIPLE_PPLS, > > + MD_ALLOW_SB_UPDATE, > > + MD_UPDATING_SB, > > + MD_NOT_READY, > > + MD_BROKEN, > > }; > > > > enum mddev_sb_flags { > > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > > index 7dc8026cf6ee..b222bafa1196 100644 > > --- a/drivers/md/raid1.c > > +++ b/drivers/md/raid1.c > > @@ -1615,30 +1615,37 @@ static void raid1_status(struct seq_file > > *seq, struct mddev *mddev) seq_printf(seq, "]"); > > } > > > > +/** > > + * raid1_error() - RAID1 error handler. > > + * @mddev: affected md device. > > + **@rdev: member device to remove.* > > It is confusing, rdev is removed in raid1_remove_disk only. > Yes, that make sense. I will address it. > > + * > > + * Error on @rdev is raised and it is needed to be removed from > > @mddev. > > + * If there are more than one active member, @rdev is always > > removed. > > + * > > + * If it is the last active member, it depends on > > &mddev->fail_last_dev: > > + * - if is on @rdev is removed. > > + * - if is off, @rdev is not removed, but recovery from it is > > disabled (@rdev is > > + * very likely to fail). > > + * In both cases, &MD_BROKEN will be set in &mddev->flags. > > + */ > > static void raid1_error(struct mddev *mddev, struct md_rdev *rdev) > > { > > char b[BDEVNAME_SIZE]; > > struct r1conf *conf = mddev->private; > > unsigned long flags; > > > > - /* > > - * If it is not operational, then we have already marked > > it as dead > > - * else if it is the last working disks with > > "fail_last_dev == false", > > - * ignore the error, let the next level up know. > > - * else mark the drive as failed > > - */ > > spin_lock_irqsave(&conf->device_lock, flags); > > - if (test_bit(In_sync, &rdev->flags) && > > !mddev->fail_last_dev > > - && (conf->raid_disks - mddev->degraded) == 1) { > > - /* > > - * Don't fail the drive, act as though we were > > just a > > - * normal single drive. > > - * However don't try a recovery from this drive as > > - * it is very likely to fail. > > - */ > > - conf->recovery_disabled = mddev->recovery_disabled; > > - spin_unlock_irqrestore(&conf->device_lock, flags); > > - return; > > + > > + if (test_bit(In_sync, &rdev->flags) && > > + (conf->raid_disks - mddev->degraded) == 1) { > > + set_bit(MD_BROKEN, &mddev->flags); > > + > > + if (!mddev->fail_last_dev) { > > + conf->recovery_disabled = > > mddev->recovery_disabled; > > + spin_unlock_irqrestore(&conf->device_lock, > > flags); > > + return; > > + } > > } > > set_bit(Blocked, &rdev->flags); > > if (test_and_clear_bit(In_sync, &rdev->flags)) > > diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c > > index dde98f65bd04..7471e20d7cd6 100644 > > --- a/drivers/md/raid10.c > > +++ b/drivers/md/raid10.c > > @@ -1945,26 +1945,37 @@ static int enough(struct r10conf *conf, int > > ignore) _enough(conf, 1, ignore); > > } > > > > +/** > > + * raid10_error() - RAID10 error handler. > > + * @mddev: affected md device. > > + * @rdev: member device to remove. > > Ditto. > Noted. Thanks, Mariusz