[PATCH 2/3] md-cluster: show array's status more accurate

Guoqing Jiang <gqjiang@xxxxxxxx> · Mon, 2 Jul 2018 16:26:25 +0800

When resync or recovery is happening in one node,
other nodes don't show the appropriate info now.

For example, when create an array in master node
without "--assume-clean", then assemble the array
in slave nodes, you can see "resync=PENDING" when
read /proc/mdstat in slave nodes. However, the info
is confusing since "PENDING" status is introduced
for start array in read-only mode.

We introduce RESYNCING_REMOTE flag to indicate that
resync thread is running in remote node. The flags
is set when node receive RESYNCING msg. And we clear
the REMOTE flag in following cases:

1. resync or recover is finished in master node,
   which means slaves receive msg with both lo
   and hi are set to 0.
2. node continues resync/recovery in recover_bitmaps.
3. when resync_finish is called.

Then we show accurate information in status_resync
by check REMOTE flags and with other conditions.

Signed-off-by: Guoqing Jiang <gqjiang@xxxxxxxx>
Reviewed-by: NeilBrown <neilb@xxxxxxxx>
---
 drivers/md/md-cluster.c | 18 ++++++++++++++++--
 drivers/md/md.c         | 17 +++++++++++++++++
 drivers/md/md.h         |  1 +
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 3894836816d1..8d752279fa7c 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -338,8 +338,14 @@ static void recover_bitmaps(struct md_thread *thread)
 			/* wake up thread to continue resync in case resync
 			 * is not finished */
 			if (mddev->recovery_cp != MaxSector) {
-			    set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-			    md_wakeup_thread(mddev->thread);
+				/*
+				 * clear the REMOTE flag since we will launch
+				 * resync thread in current node.
+				 */
+				clear_bit(MD_RESYNCING_REMOTE,
+					  &mddev->recovery);
+				set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+				md_wakeup_thread(mddev->thread);
 			}
 		}
 clear_bit:
@@ -458,6 +464,11 @@ static void process_suspend_info(struct mddev *mddev,
 	struct suspend_info *s;
 
 	if (!hi) {
+		/*
+		 * clear the REMOTE flag since resync or recovery is finished
+		 * in remote node.
+		 */
+		clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
 		remove_suspend_info(mddev, slot);
 		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 		md_wakeup_thread(mddev->thread);
@@ -586,6 +597,7 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 		revalidate_disk(mddev->gendisk);
 		break;
 	case RESYNCING:
+		set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
 		process_suspend_info(mddev, le32_to_cpu(msg->slot),
 				     le64_to_cpu(msg->low),
 				     le64_to_cpu(msg->high));
@@ -1266,6 +1278,8 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
 static int resync_finish(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
 	dlm_unlock_sync(cinfo->resync_lockres);
 	return resync_info_update(mddev, 0, 0);
 }
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6b4e2f29fe4e..5e16fb5e2d4a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7704,6 +7704,23 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev)
 		resync -= atomic_read(&mddev->recovery_active);
 
 	if (resync == 0) {
+		if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) {
+			struct md_rdev *rdev;
+
+			rdev_for_each(rdev, mddev)
+				if (rdev->raid_disk >= 0 &&
+				    !test_bit(Faulty, &rdev->flags) &&
+				    rdev->recovery_offset != MaxSector &&
+				    rdev->recovery_offset) {
+					seq_printf(seq, "\trecover=REMOTE");
+					return 1;
+				}
+			if (mddev->reshape_position != MaxSector)
+				seq_printf(seq, "\treshape=REMOTE");
+			else
+				seq_printf(seq, "\tresync=REMOTE");
+			return 1;
+		}
 		if (mddev->recovery_cp < MaxSector) {
 			seq_printf(seq, "\tresync=PENDING");
 			return 1;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ffcb1ae217fe..176165cd2d57 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -496,6 +496,7 @@ enum recovery_flags {
 	MD_RECOVERY_FROZEN,	/* User request to abort, and not restart, any action */
 	MD_RECOVERY_ERROR,	/* sync-action interrupted because io-error */
 	MD_RECOVERY_WAIT,	/* waiting for pers->start() to finish */
+	MD_RESYNCING_REMOTE,	/* remote node is running resync thread */
 };
 
 static inline int __must_check mddev_lock(struct mddev *mddev)
-- 
2.13.1

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html