From: Yu Kuai <yukuai3@xxxxxxxxxx> md_kick_rdev_from_array() can be called from md_check_recovery() and md_reload_sb() for md-cluster, it's very complicated to use new apis to suspend the array before holding 'reconfig_mutex' in this case. Fortunately, md-cluster is only supported for raid1 and raid10, and they both impelement quiesce() callback that is safe to be called from daemon thread. Hence use quiesce() callback to prevent io concurrent with removing rdev from the array. Signed-off-by: Yu Kuai <yukuai3@xxxxxxxxxx> --- drivers/md/md.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index a3bc4968fa0f..3343767882bb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9609,6 +9609,21 @@ void md_check_recovery(struct mddev *mddev) if (mddev_is_clustered(mddev)) { struct md_rdev *rdev, *tmp; + bool suspended = false; + + /* + * md-cluster is used for raid1/raid10, and they both + * implement quiesce() callback that is safe to be + * called from daemon thread. + */ + rdev_for_each(rdev, mddev) + if (test_bit(ClusterRemove, &rdev->flags) && + rdev->raid_disk < 0) { + mddev->pers->quiesce(mddev, true); + suspended = true; + break; + } + /* kick the device if another node issued a * remove disk. */ @@ -9617,6 +9632,9 @@ void md_check_recovery(struct mddev *mddev) rdev->raid_disk < 0) md_kick_rdev_from_array(rdev); } + + if (suspended) + mddev->pers->quiesce(mddev, false); } if (try_set_sync && !mddev->external && !mddev->in_sync) { @@ -9904,6 +9922,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) { struct mdp_superblock_1 *sb = page_address(rdev->sb_page); struct md_rdev *rdev2, *tmp; + bool suspended = false; int role, ret; /* @@ -9918,6 +9937,22 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) md_bitmap_update_sb(mddev->bitmap); } + /* + * md-cluster is used for raid1/raid10, and they both + * implement quiesce() callback. + */ + rdev_for_each(rdev2, mddev) { + if (test_bit(Faulty, &rdev2->flags)) + continue; + role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]); + if (test_bit(Candidate, &rdev2->flags) && + role == MD_DISK_ROLE_FAULTY) { + mddev->pers->quiesce(mddev, true); + suspended = true; + break; + } + } + /* Check for change of roles in the active devices */ rdev_for_each_safe(rdev2, tmp, mddev) { if (test_bit(Faulty, &rdev2->flags)) @@ -9966,6 +10001,9 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev) } } + if (suspended) + mddev->pers->quiesce(mddev, false); + if (mddev->raid_disks != le32_to_cpu(sb->raid_disks)) { ret = update_raid_disks(mddev, le32_to_cpu(sb->raid_disks)); if (ret) -- 2.39.2