On 07/10/2015 03:54 AM, Guoqing Jiang wrote:
When node A stop an array while the array is doing resync, we need let another node B to take over the resync task. To achieve the goal, we need the A send an explicit BITMAP_NEEDS_SYNC message to the cluster. And the node B which received that message will invoke __recover_slot to do resync. Signed-off-by: Guoqing Jiang <gqjiang@xxxxxxxx>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@xxxxxxxx>
--- drivers/md/md-cluster.c | 15 +++++++++++++++ drivers/md/md.c | 6 +++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 24caabe..47199ad 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -75,6 +75,7 @@ enum msg_type { NEWDISK, REMOVE, RE_ADD, + BITMAP_NEEDS_SYNC, }; struct cluster_msg { @@ -454,6 +455,11 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) __func__, __LINE__, msg->slot); process_readd_disk(mddev, msg); break; + case BITMAP_NEEDS_SYNC: + pr_info("%s: %d Received BITMAP_NEEDS_SYNC from %d\n", + __func__, __LINE__, msg->slot); + __recover_slot(mddev, msg->slot); + break; default: pr_warn("%s:%d Received unknown message from %d\n", __func__, __LINE__, msg->slot); @@ -814,8 +820,17 @@ static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi) static void resync_finish(struct mddev *mddev) { + struct md_cluster_info *cinfo = mddev->cluster_info; + struct cluster_msg cmsg; + int slot = cinfo->slot_number - 1; + pr_info("%s:%d\n", __func__, __LINE__); resync_send(mddev, RESYNCING, 0, 0); + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC); + cmsg.slot = cpu_to_le32(slot); + sendmsg(cinfo, &cmsg); + } } static int area_resyncing(struct mddev *mddev, int direction, diff --git a/drivers/md/md.c b/drivers/md/md.c index df92d30..7d05dff 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7931,9 +7931,6 @@ void md_do_sync(struct md_thread *thread) /* tell personality that we are finished */ mddev->pers->sync_request(mddev, max_sectors, &skipped); - if (mddev_is_clustered(mddev)) - md_cluster_ops->resync_finish(mddev); - if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && mddev->curr_resync > 2) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { @@ -7967,6 +7964,9 @@ void md_do_sync(struct md_thread *thread) } } skip: + if (mddev_is_clustered(mddev)) + md_cluster_ops->resync_finish(mddev); + set_bit(MD_CHANGE_DEVS, &mddev->flags); spin_lock(&mddev->lock);
-- Goldwyn -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html