On 1/27/22 11:39 PM, Mariusz Tkaczyk wrote:
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f888ef197765..fda8473f96b8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2983,10 +2983,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
- if (test_bit(Faulty, &rdev->flags))
- err = 0;
- else
+
+ if (test_bit(MD_BROKEN, &rdev->mddev->flags))
err = -EBUSY;
+ else
+ err = 0;
Again, it is weird to check MD_BROKEN which is for mddev in rdev
specific function from
my understanding.
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
@@ -7441,7 +7442,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
err = -ENODEV;
else {
md_error(mddev, rdev);
- if (!test_bit(Faulty, &rdev->flags))
+ if (test_bit(MD_BROKEN, &mddev->flags))
err = -EBUSY;
Ditto.
}
rcu_read_unlock();
@@ -7987,12 +7988,14 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
if (!mddev->pers->sync_request)
return;
- if (mddev->degraded)
+ if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ if (!test_bit(MD_BROKEN, &mddev->flags)) {
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
+ }
if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();
diff --git a/drivers/md/md.h b/drivers/md/md.h
index bc3f2094d0b6..1eb7d0e88cb2 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
struct md_cluster_info;
-/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
+/**
+ * enum mddev_flags - md device flags.
+ * @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
+ * @MD_CLOSING: If set, we are closing the array, do not open it then.
+ * @MD_JOURNAL_CLEAN: A raid with journal is already clean.
+ * @MD_HAS_JOURNAL: The raid array has journal feature set.
+ * @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
+ * resync lock, need to release the lock.
+ * @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
+ * calls to md_error() will never cause the array to
+ * become failed.
+ * @MD_HAS_PPL: The raid array has PPL feature set.
+ * @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
+ * @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
+ * without taking reconfig_mutex.
+ * @MD_UPDATING_SB: md_check_recovery is updating the metadata without
+ * explicitly holding reconfig_mutex.
+ * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
+ * array is ready yet.
+ * @MD_BROKEN: This is used to stop writes and mark array as failed.
+ *
+ * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
+ */
enum mddev_flags {
- MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
- MD_CLOSING, /* If set, we are closing the array, do not open
- * it then */
- MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
- MD_HAS_JOURNAL, /* The raid array has journal feature set */
- MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
- * already took resync lock, need to
- * release the lock */
- MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
- * supported as calls to md_error() will
- * never cause the array to become failed.
- */
- MD_HAS_PPL, /* The raid array has PPL feature set */
- MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
- MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
- * the metadata without taking reconfig_mutex.
- */
- MD_UPDATING_SB, /* md_check_recovery is updating the metadata
- * without explicitly holding reconfig_mutex.
- */
- MD_NOT_READY, /* do_md_run() is active, so 'array_state'
- * must not report that array is ready yet
- */
- MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
- * I/O in case an array member is gone/failed.
- */
People have to scroll up to see the meaning of each flag with the
change, I would
suggest move these comments on top of each flag if you really hate
previous style,
but just my personal flavor.
+ MD_ARRAY_FIRST_USE,
+ MD_CLOSING,
+ MD_JOURNAL_CLEAN,
+ MD_HAS_JOURNAL,
+ MD_CLUSTER_RESYNC_LOCKED,
+ MD_FAILFAST_SUPPORTED,
+ MD_HAS_PPL,
+ MD_HAS_MULTIPLE_PPLS,
+ MD_ALLOW_SB_UPDATE,
+ MD_UPDATING_SB,
+ MD_NOT_READY,
+ MD_BROKEN,
};
enum mddev_sb_flags {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7dc8026cf6ee..b222bafa1196 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1615,30 +1615,37 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, "]");
}
+/**
+ * raid1_error() - RAID1 error handler.
+ * @mddev: affected md device.
+ **@rdev: member device to remove.*
It is confusing, rdev is removed in raid1_remove_disk only.
+ *
+ * Error on @rdev is raised and it is needed to be removed from @mddev.
+ * If there are more than one active member, @rdev is always removed.
+ *
+ * If it is the last active member, it depends on &mddev->fail_last_dev:
+ * - if is on @rdev is removed.
+ * - if is off, @rdev is not removed, but recovery from it is disabled (@rdev is
+ * very likely to fail).
+ * In both cases, &MD_BROKEN will be set in &mddev->flags.
+ */
static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
struct r1conf *conf = mddev->private;
unsigned long flags;
- /*
- * If it is not operational, then we have already marked it as dead
- * else if it is the last working disks with "fail_last_dev == false",
- * ignore the error, let the next level up know.
- * else mark the drive as failed
- */
spin_lock_irqsave(&conf->device_lock, flags);
- if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
- && (conf->raid_disks - mddev->degraded) == 1) {
- /*
- * Don't fail the drive, act as though we were just a
- * normal single drive.
- * However don't try a recovery from this drive as
- * it is very likely to fail.
- */
- conf->recovery_disabled = mddev->recovery_disabled;
- spin_unlock_irqrestore(&conf->device_lock, flags);
- return;
+
+ if (test_bit(In_sync, &rdev->flags) &&
+ (conf->raid_disks - mddev->degraded) == 1) {
+ set_bit(MD_BROKEN, &mddev->flags);
+
+ if (!mddev->fail_last_dev) {
+ conf->recovery_disabled = mddev->recovery_disabled;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ return;
+ }
}
set_bit(Blocked, &rdev->flags);
if (test_and_clear_bit(In_sync, &rdev->flags))
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index dde98f65bd04..7471e20d7cd6 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1945,26 +1945,37 @@ static int enough(struct r10conf *conf, int ignore)
_enough(conf, 1, ignore);
}
+/**
+ * raid10_error() - RAID10 error handler.
+ * @mddev: affected md device.
+ * @rdev: member device to remove.
Ditto.
Thanks,
Guoqing